This is the mail archive of the libc-alpha@sources.redhat.com mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]

Re: a smaller LC_CTYPE locale format



One more place must be updated for the new locale format: fnmatch.
The comments say that fnmatch's speed in 8-bit locales depends on a
fast
      iswctype (btowc (c), desc)
function without going through btowc and gconv. Therefore I add a small
(32 byte) table for this in front of each wctype_t table.


2000-07-27  Bruno Haible  <haible@clisp.cons.org>

	* locale/localeinfo.h (_ISCTYPE): New macro.
	* posix/fnmatch_loop.c (internal_fnmatch): Use it, support new
	LC_CTYPE locale format.
	* locale/programs/ld-ctype.c (locale_ctype_t): New field class_b.
	(ctype_output): Output class_b[nr] right before class_3level[nr].
	(allocate_arrays): Fill class_b, similarly to class_3level.

*** glibc-20000724/locale/localeinfo.h.bak	Mon Jul  3 16:39:31 2000
--- glibc-20000724/locale/localeinfo.h	Fri Jul 28 01:00:21 2000
***************
*** 121,126 ****
--- 121,134 ----
  };
  
  
+ /* LC_CTYPE specific:
+    Access a wide character class with a single character index.
+    _ISCTYPE (c, desc) = iswctype (btowc (c), desc).
+    c must be an `unsigned char'.  desc must be a nonzero wctype_t.  */
+ #define _ISCTYPE(c, desc) \
+   (((((const uint32_t *) (desc)) - 8)[(c) >> 5] >> ((c) & 0x1f)) & 1)
+ 
+ 
  /* For each category declare the variable for the current locale data.  */
  #define DEFINE_CATEGORY(category, category_name, items, a) \
  extern struct locale_data *_nl_current_##category;
*** glibc-20000724/posix/fnmatch_loop.c.bak	Thu Jul  6 17:36:53 2000
--- glibc-20000724/posix/fnmatch_loop.c	Fri Jul 28 00:38:53 2000
***************
*** 256,290 ****
  		      /* Invalid character class name.  */
  		      return FNM_NOMATCH;
  
- 		    /* The following code is glibc specific but does
- 		       there a good job in sppeding up the code since
- 		       we can avoid the btowc() call.  The
- 		       IS_CHAR_CLASS call will return a bit mask for
- 		       the 32-bit table.  We have to convert it to a
- 		       bitmask for the __ctype_b table.  This has to
- 		       be done based on the byteorder as can be seen
- 		       below.  In any case we will fall back on the
- 		       code using btowc() if the class is not one of
- 		       the standard classes.  */
  # if defined _LIBC && ! WIDE_CHAR_VERSION
! #  if __BYTE_ORDER == __LITTLE_ENDIAN
! 		    if ((wt & 0xf0ffff) == 0)
  		      {
! 			wt >>= 16;
! 			if ((__ctype_b[(UCHAR) *n] & wt) != 0)
! 			  goto matched;
! 		      }
  #  else
! 		    if (wt <= 0x800)
  		      {
! 			if ((__ctype_b[(UCHAR) *n] & wt) != 0)
  			  goto matched;
  		      }
! #  endif
! 		    else
  # endif
- 		      if (ISWCTYPE (BTOWC ((UCHAR) *n), wt))
- 			goto matched;
  #else
  		    if ((STREQ (str, L("alnum")) && ISALNUM ((UCHAR) *n))
  			|| (STREQ (str, L("alpha")) && ISALPHA ((UCHAR) *n))
--- 256,296 ----
  		      /* Invalid character class name.  */
  		      return FNM_NOMATCH;
  
  # if defined _LIBC && ! WIDE_CHAR_VERSION
! 		    /* The following code is glibc specific but does
! 		       there a good job in speeding up the code since
! 		       we can avoid the btowc() call.  */
! 		    if (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_HASH_SIZE) != 0)
  		      {
! 			/* Old locale format.  */
! #  if __BYTE_ORDER == __LITTLE_ENDIAN
! 			if ((wt & 0xf0ffff) == 0)
! 			  {
! 			    wt >>= 16;
! 			    if ((__ctype_b[(UCHAR) *n] & wt) != 0)
! 			      goto matched;
! 			  }
  #  else
! 			if (wt <= 0x800)
! 			  {
! 			    if ((__ctype_b[(UCHAR) *n] & wt) != 0)
! 			      goto matched;
! 			  }
! #  endif
! 			else
! 			  if (ISWCTYPE (BTOWC ((UCHAR) *n), wt))
! 			    goto matched;
! 		      }
! 		    else
  		      {
! 			/* New locale format.  */
! 			if (_ISCTYPE ((UCHAR) *n, wt))
  			  goto matched;
  		      }
! # else
! 		    if (ISWCTYPE (BTOWC ((UCHAR) *n), wt))
! 		      goto matched;
  # endif
  #else
  		    if ((STREQ (str, L("alnum")) && ISALNUM ((UCHAR) *n))
  			|| (STREQ (str, L("alpha")) && ISALPHA ((UCHAR) *n))
*** glibc-20000724/locale/programs/ld-ctype.c.bak	Tue Jul 25 01:53:16 2000
--- glibc-20000724/locale/programs/ld-ctype.c	Fri Jul 28 02:18:12 2000
***************
*** 170,175 ****
--- 170,176 ----
    uint32_t *names;
    uint32_t **map;
    uint32_t **map32;
+   uint32_t **class_b;
    struct iovec *class_3level;
    struct iovec *map_3level;
    uint32_t *class_name_ptr;
***************
*** 842,848 ****
  			 + (oldstyle_tables
  			    ? (ctype->map_collection_nr - 2)
  			    : (ctype->nr_charclass + ctype->map_collection_nr)));
!   struct iovec iov[2 + nelems + ctype->nr_charclass
  		  + ctype->map_collection_nr + 2];
    struct locale_file data;
    uint32_t idx[nelems + 1];
--- 843,849 ----
  			 + (oldstyle_tables
  			    ? (ctype->map_collection_nr - 2)
  			    : (ctype->nr_charclass + ctype->map_collection_nr)));
!   struct iovec iov[2 + nelems + 2 * ctype->nr_charclass
  		  + ctype->map_collection_nr + 2];
    struct locale_file data;
    uint32_t idx[nelems + 1];
***************
*** 1169,1174 ****
--- 1170,1181 ----
  	      size_t nr = elem - _NL_ITEM_INDEX (_NL_NUM_LC_CTYPE);
  	      if (nr < ctype->nr_charclass)
  		{
+ 		  iov[2 + elem + offset].iov_base = ctype->class_b[nr];
+ 		  iov[2 + elem + offset].iov_len = 256 / 32
+ 						   * sizeof (uint32_t);
+ 		  idx[elem] += iov[2 + elem + offset].iov_len;
+ 		  ++offset;
+ 
  		  iov[2 + elem + offset] = ctype->class_3level[nr];
  		}
  	      else
***************
*** 1182,1188 ****
  	}
      }
  
!   assert (2 + elem + offset == (nelems + ctype->nr_charclass
  				+ ctype->map_collection_nr + 2 + 2));
  
    write_locale_data (output_path, "LC_CTYPE", 2 + elem + offset, iov);
--- 1189,1195 ----
  	}
      }
  
!   assert (2 + elem + offset == (nelems + 2 * ctype->nr_charclass
  				+ ctype->map_collection_nr + 2 + 2));
  
    write_locale_data (output_path, "LC_CTYPE", 2 + elem + offset, iov);
***************
*** 4060,4067 ****
      xcalloc ((oldstyle_tables ? ctype->plane_size * ctype->plane_cnt : 256),
  	     sizeof (char_class32_t));
    if (!oldstyle_tables)
!     ctype->class_3level = (struct iovec *)
!       xmalloc (ctype->nr_charclass * sizeof (struct iovec));
  
    /* This is the array accessed using the multibyte string elements.  */
    for (idx = 0; idx < 256; ++idx)
--- 4067,4078 ----
      xcalloc ((oldstyle_tables ? ctype->plane_size * ctype->plane_cnt : 256),
  	     sizeof (char_class32_t));
    if (!oldstyle_tables)
!     {
!       ctype->class_b = (uint32_t **)
! 	xmalloc (ctype->nr_charclass * sizeof (uint32_t *));
!       ctype->class_3level = (struct iovec *)
! 	xmalloc (ctype->nr_charclass * sizeof (struct iovec));
!     }
  
    /* This is the array accessed using the multibyte string elements.  */
    for (idx = 0; idx < 256; ++idx)
***************
*** 4089,4094 ****
--- 4100,4115 ----
    if (!oldstyle_tables)
      {
        size_t nr;
+ 
+       for (nr = 0; nr < ctype->nr_charclass; nr++)
+ 	{
+ 	  ctype->class_b[nr] = (uint32_t *)
+ 	    xcalloc (256 / 32, sizeof (uint32_t));
+ 
+ 	  for (idx = 0; idx < 256; ++idx)
+ 	    if (ctype->class256_collection[idx] & _ISbit (nr))
+ 	      ctype->class_b[nr][idx >> 5] |= (uint32_t)1 << (idx & 0x1f);
+ 	}
  
        for (nr = 0; nr < ctype->nr_charclass; nr++)
  	{

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]