This is the mail archive of the libc-alpha@sources.redhat.com mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]

localedef fixes for translit parsing



localedef currently has three problems relating to the parsing of
transliteration definitions:

  * It does not allow more than one include statement. But I don't see in the
    TR 16452 draft any wording prohbiting two or more include statements.
    And it is quite useful for structuring the transliteration definitions
    and letting the locale maintainers choose the subsets they wish.

  * An include statement copies the translit_t list, but not the
    default_missing definition.

  * When a locale with a translit section is parsed in ignore_content=true
    mode, an error is signalled at the first nonempty line after the
    "translit_start" line (normally an include directive or a transliteration
    definition statement).

This patch fixes these and adds a better error message for the case that
the "translit_end" keyword is missing.


2000-10-29  Bruno Haible  <haible@clisp.cons.org>

	* locale/programs/ld-ctype.c (translit_include_t): New type.
	(locale_ctype_t): Remove translit_copy_locale, translit_copy_repertoire
	fields, add translit_include field.
	(ctype_read): After seeing "translit_start" in ignore_content mode,
	skip the entire translit section, not only one line. Remove dead test
	for tok_translit_end. Enqueue include statements into a list.
	Signal error if tok_eof seen before tok_translit_end.
	(translit_flatten): New function.
	(allocate_arrays): Call translit_flatten.

*** glibc-20001010/locale/programs/ld-ctype.c.bak	Mon Oct  2 16:09:25 2000
--- glibc-20001010/locale/programs/ld-ctype.c	Mon Oct 30 01:21:09 2000
***************
*** 106,111 ****
--- 106,121 ----
  };
  
  
+ /* Type to describe a transliteration include statement.  */
+ struct translit_include_t 
+ {
+   const char *copy_locale;
+   const char *copy_repertoire;
+ 
+   struct translit_include_t *next;
+ };
+ 
+ 
  /* The real definition of the struct for the LC_CTYPE locale.  */
  struct locale_ctype_t
  {
***************
*** 155,162 ****
    uint32_t map_offset;
  
    /* Transliteration information.  */
!   const char *translit_copy_locale;
!   const char *translit_copy_repertoire;
    struct translit_t *translit;
    struct translit_ignore_t *translit_ignore;
    uint32_t ntranslit_ignore;
--- 165,171 ----
    uint32_t map_offset;
  
    /* Transliteration information.  */
!   struct translit_include_t *translit_include;
    struct translit_t *translit;
    struct translit_ignore_t *translit_ignore;
    uint32_t ntranslit_ignore;
***************
*** 2609,2619 ****
  	  break;
  
  	case tok_translit_start:
! 	  /* Ignore the rest of the line if we don't need the input of
! 	     this line.  */
  	  if (ignore_content)
  	    {
! 	      lr_ignore_rest (ldfile, 0);
  	      break;
  	    }
  
--- 2618,2639 ----
  	  break;
  
  	case tok_translit_start:
! 	  /* Ignore the entire translit section with its peculiar syntax
! 	     if we don't need the input.  */
  	  if (ignore_content)
  	    {
! 	      do
! 		{
! 		  lr_ignore_rest (ldfile, 0);
! 		  now = lr_token (ldfile, charmap, NULL);
! 		}
! 	      while (now->tok != tok_translit_end && now->tok != tok_eof);
! 
! 	      if (now->tok == tok_eof)
! 		lr_error (ldfile, _(\
! "%s: `translit_start' section does not end with `translit_end'"),
! 			  "LC_CTYPE");
! 
  	      break;
  	    }
  
***************
*** 2635,2651 ****
  		/* Ignore empty lines.  */
  		continue;
  
- 	      if (now->tok == tok_translit_end)
- 		{
- 		  lr_ignore_rest (ldfile, 0);
- 		  break;
- 		}
- 
  	      if (now->tok == tok_include)
  		{
  		  /* We have to include locale.  */
  		  const char *locale_name;
  		  const char *repertoire_name;
  
  		  now = lr_token (ldfile, charmap, NULL);
  		  /* This should be a string or an identifier.  In any
--- 2655,2666 ----
  		/* Ignore empty lines.  */
  		continue;
  
  	      if (now->tok == tok_include)
  		{
  		  /* We have to include locale.  */
  		  const char *locale_name;
  		  const char *repertoire_name;
+ 		  struct translit_include_t *include_stmt, **include_ptr;
  
  		  now = lr_token (ldfile, charmap, NULL);
  		  /* This should be a string or an identifier.  In any
***************
*** 2671,2694 ****
  		    goto translit_syntax;
  		  repertoire_name = now->val.str.startmb;
  
! 		  /* We must not have more than one `include'.  */
! 		  if (ctype->translit_copy_locale != NULL)
! 		    {
! 		      lr_error (ldfile, _("\
! %s: only one `include' instruction allowed"), "LC_CTYPE");
! 		      lr_ignore_rest (ldfile, 0);
! 		      continue;
! 		    }
! 
! 		  ctype->translit_copy_locale = locale_name;
! 		  ctype->translit_copy_repertoire = repertoire_name;
  
  		  /* The rest of the line must be empty.  */
  		  lr_ignore_rest (ldfile, 1);
  
  		  /* Make sure the locale is read.  */
! 		  add_to_readlist (LC_CTYPE, ctype->translit_copy_locale,
! 				   repertoire_name, 1, NULL);
  		  continue;
  		}
  	      else if (now->tok == tok_default_missing)
--- 2686,2709 ----
  		    goto translit_syntax;
  		  repertoire_name = now->val.str.startmb;
  
! 		  /* Save the include statement for later processing.  */
! 		  include_stmt = (struct translit_include_t *)
! 		    xmalloc (sizeof (struct translit_include_t));
! 		  include_stmt->copy_locale = locale_name;
! 		  include_stmt->copy_repertoire = repertoire_name;
! 		  include_stmt->next = NULL;
! 
! 		  include_ptr = &ctype->translit_include;
! 		  while (*include_ptr != NULL)
! 		    include_ptr = &(*include_ptr)->next;
! 		  *include_ptr = include_stmt;
  
  		  /* The rest of the line must be empty.  */
  		  lr_ignore_rest (ldfile, 1);
  
  		  /* Make sure the locale is read.  */
! 		  add_to_readlist (LC_CTYPE, locale_name, repertoire_name,
! 				   1, NULL);
  		  continue;
  		}
  	      else if (now->tok == tok_default_missing)
***************
*** 2753,2758 ****
--- 2768,2779 ----
  	      read_translit_entry (ldfile, ctype, now, charmap, repertoire);
  	    }
  	  ldfile->return_widestr = 0;
+ 
+ 	  if (now->tok == tok_eof)
+ 	    lr_error (ldfile, _(\
+ "%s: `translit_start' section does not end with `translit_end'"),
+ 		      "LC_CTYPE");
+ 
  	  break;
  
  	case tok_ident:
***************
*** 3591,3596 ****
--- 3612,3673 ----
  }
  
  
+ /* Flattens the included transliterations into a translit list.
+    Inserts them in the list at `cursor', and returns the new cursor.  */
+ static struct translit_t **
+ translit_flatten (struct locale_ctype_t *ctype, struct charmap_t *charmap,
+ 		  struct translit_t **cursor)
+ {
+   while (ctype->translit_include != NULL)
+     {
+       const char *copy_locale = ctype->translit_include->copy_locale;
+       const char *copy_repertoire = ctype->translit_include->copy_repertoire;
+       struct localedef_t *other;
+ 
+       /* Unchain the include statement.  During the depth-first traversal
+ 	 we don't want to visit any locale more than once.  */
+       ctype->translit_include = ctype->translit_include->next;
+ 
+       other = find_locale (LC_CTYPE, copy_locale, copy_repertoire, charmap);
+ 
+       if (other == NULL)
+ 	{
+ 	  error (0, 0, _("\
+ %s: transliteration data from locale `%s' not available"),
+ 		 "LC_CTYPE", copy_locale);
+ 	}
+       else
+ 	{
+ 	  struct locale_ctype_t *other_ctype =
+ 	    other->categories[LC_CTYPE].ctype;
+ 
+ 	  cursor = translit_flatten (other_ctype, charmap, cursor);
+ 	  assert (other_ctype->translit_include == NULL);
+ 
+ 	  if (other_ctype->translit != NULL)
+ 	    {
+ 	      /* Insert the other_ctype->translit list at *cursor.  */
+ 	      struct translit_t *endp = other_ctype->translit;
+ 	      while (endp->next != NULL)
+ 		endp = endp->next;
+ 
+ 	      endp->next = *cursor;
+ 	      *cursor = other_ctype->translit;
+ 
+ 	      /* Avoid any risk of circular lists.  */
+ 	      other_ctype->translit = NULL;
+ 
+ 	      cursor = &endp->next;
+ 	    }
+ 
+ 	  if (ctype->default_missing == NULL)
+ 	    ctype->default_missing = other_ctype->default_missing;
+ 	}
+     }
+ 
+   return cursor;
+ }
+ 
  static void
  allocate_arrays (struct locale_ctype_t *ctype, struct charmap_t *charmap,
  		 struct repertoire_t *repertoire)
***************
*** 3867,3907 ****
       complicated algorithm which uses a hash table to locate the entries.
       For now I'll use a simple array which can be searching using binary
       search.  */
!   if (ctype->translit_copy_locale != NULL)
!     {
!       /* Fold in the transliteration information from the locale mentioned
! 	 in the `include' statement.  */
!       struct locale_ctype_t *here = ctype;
! 
!       do
! 	{
! 	  struct localedef_t *other = find_locale (LC_CTYPE,
! 						   here->translit_copy_locale,
! 						   repertoire->name, charmap);
! 
! 	  if (other == NULL)
! 	    {
! 	      error (0, 0, _("\
! %s: transliteration data from locale `%s' not available"),
! 		     "LC_CTYPE", here->translit_copy_locale);
! 	      break;
! 	    }
! 
! 	  here = other->categories[LC_CTYPE].ctype;
! 
! 	  /* Enqueue the information if necessary.  */
! 	  if (here->translit != NULL)
! 	    {
! 	      struct translit_t *endp = here->translit;
! 	      while (endp->next != NULL)
! 		endp = endp->next;
! 
! 	      endp->next = ctype->translit;
! 	      ctype->translit = here->translit;
! 	    }
! 	}
!       while (here->translit_copy_locale != NULL);
!     }
  
    if (ctype->translit != NULL)
      {
--- 3944,3953 ----
       complicated algorithm which uses a hash table to locate the entries.
       For now I'll use a simple array which can be searching using binary
       search.  */
!   if (ctype->translit_include != NULL)
!     /* Traverse the locales mentioned in the `include' statements in a
!        depth-first way and fold in their transliteration information.  */
!     translit_flatten (ctype, charmap, &ctype->translit);
  
    if (ctype->translit != NULL)
      {

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]