This is the mail archive of the libc-alpha@sources.redhat.com mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]

Unicode 3.1 support (1)



This is the second patch for supporting Unicode 3.1 in libc. (The first one
was the UTF-32 encoding.)

While Unicode 3.0 ended at U+FFFD, Unicode 3.1 extends up to U+10FFFD.

First, there was a special hack in localedef for speeding up the find_idx
function for values < 0x10000. Here is a patch which uses the 3-level table
datastructure to extend this upto 0xFFFFFFFF. It doesn't make a measurable
speed difference versus the old hack. Without this, the generation of an
UTF-8 locale doesn't finish within a minute.

Second, without use of the ".." notation, the UTF-8 and GB18030 charmaps
would each become larger than 9 MB. So, to make these charmaps small, first
fix a bug in the ".." treatment: the ->ucs4 value was always set to
UNINITIALIZED_CHAR_VALUE.


2001-05-29  Bruno Haible  <haible@clisp.cons.org>

	* locale/programs/ld-ctype.c (idx_table): New struct type.
	(idx_table_init, idx_table_get, idx_table_add): New functions.
	(MAX_CHARNAMES_IDX): Remove macro.
	(locale_ctype_t): Change type of charnames_idx field.
	(ctype_startup): Change initialization of charnames_idx field.
	(find_idx): Use idx_table_get and idx_table_add for speed.

	* locale/programs/charmap.c (charmap_new_char): Fix ucs4 value
	computation of characters in a range.

--- glibc-20010430/locale/programs/ld-ctype.c.bak	Tue Feb  6 14:39:10 2001
+++ glibc-20010430/locale/programs/ld-ctype.c	Sun May 27 12:23:24 2001
@@ -116,6 +116,14 @@
 };
 
 
+/* Sparse table of uint32_t.  */
+#define TABLE idx_table
+#define ELEMENT uint32_t
+#define DEFAULT ~((uint32_t) 0)
+#define NO_FINALIZE
+#include "3level.h"
+
+
 /* The real definition of the struct for the LC_CTYPE locale.  */
 struct locale_ctype_t
 {
@@ -123,8 +131,7 @@
   size_t charnames_max;
   size_t charnames_act;
   /* An index lookup table, to speedup find_idx.  */
-#define MAX_CHARNAMES_IDX 0x10000
-  uint32_t *charnames_idx;
+  struct idx_table charnames_idx;
 
   struct repertoire_t *repertoire;
 
@@ -261,10 +268,7 @@
 	  for (cnt = 0; cnt < 256; ++cnt)
 	    ctype->charnames[cnt] = cnt;
 	  ctype->charnames_act = 256;
-	  ctype->charnames_idx =
-	    (uint32_t *) xmalloc (MAX_CHARNAMES_IDX * sizeof (uint32_t));
-	  for (cnt = 0; cnt < MAX_CHARNAMES_IDX; ++cnt)
-	    ctype->charnames_idx[cnt] = ~((uint32_t) 0);
+	  idx_table_init (&ctype->charnames_idx);
 
 	  /* Fill character class information.  */
 	  ctype->last_class_char = ILLEGAL_CHAR_VALUE;
@@ -1280,23 +1284,17 @@
   if (idx < 256)
     return table == NULL ? NULL : &(*table)[idx];
 
-  /* If idx is in the usual range, use the charnames_idx lookup table
-     instead of the slow search loop.  */
-  if (idx < MAX_CHARNAMES_IDX)
-    {
-      if (ctype->charnames_idx[idx] != ~((uint32_t) 0))
-	/* Found.  */
-	cnt = ctype->charnames_idx[idx];
-      else
-	/* Not found.  */
-	cnt = ctype->charnames_act;
-    }
-  else
-    {
-      for (cnt = 256; cnt < ctype->charnames_act; ++cnt)
-	if (ctype->charnames[cnt] == idx)
-	  break;
-    }
+  /* Use the charnames_idx lookup table instead of the slow search loop.  */
+#if 1
+  cnt = idx_table_get (&ctype->charnames_idx, idx);
+  if (cnt == ~((uint32_t) 0))
+    /* Not found.  */
+    cnt = ctype->charnames_act;
+#else
+  for (cnt = 256; cnt < ctype->charnames_act; ++cnt)
+    if (ctype->charnames[cnt] == idx)
+      break;
+#endif
 
   /* We have to distinguish two cases: the name is found or not.  */
   if (cnt == ctype->charnames_act)
@@ -1310,8 +1308,7 @@
 		      sizeof (uint32_t) * ctype->charnames_max);
 	}
       ctype->charnames[ctype->charnames_act++] = idx;
-      if (idx < MAX_CHARNAMES_IDX)
-	ctype->charnames_idx[idx] = cnt;
+      idx_table_add (&ctype->charnames_idx, idx, cnt);
     }
 
   if (table == NULL)
--- glibc-20010430/locale/programs/charmap.c.bak	Thu Apr  5 22:19:13 2001
+++ glibc-20010430/locale/programs/charmap.c	Tue May 29 01:19:27 2001
@@ -1042,7 +1042,7 @@
 	  char *endp;
 
 	  errno = 0;
-	  newp->ucs4 = strtoul (name_end, &endp, 16);
+	  newp->ucs4 = strtoul (name_end + 1, &endp, 16);
 	  if (endp - name_end != len1
 	      || (newp->ucs4 == ULONG_MAX && errno == ERANGE)
 	      || newp->ucs4 >= 0x80000000)


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]