This is the mail archive of the libc-alpha@sources.redhat.com mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] Allow optimizing locale-archive for speed or size


Hi!

The following patch adds --compact option to localedef and differentiates
between optimization for speed (the default) and for size (with --compact
option).
On my test box, locale-archive with 487 locales (almost half of it
were UTF-8 (=big) ones) with --compact option (identical
to localedef without this patch) was 29359882B, without --compact option
29863728B, ie. ~ 1.7% size increase.
The advantage of this is that all the tiny locale files (where tiny is
defined as all but LC_COLLATE and LC_CTYPE - LC_TIME is questionable
but for more than half of the locales all the small ones including LC_TIME
fit into less than 4096 bytes, for the remaining ones they are a few bytes
above 4K) are together, so glibc can do fewer mmap calls.
E.g. LC_ALL=en_US.utf8 echo on that box had to use 7 mmap calls on the
locale-archive (and 122 major page faults in time output) while with
locale-archive optimized for speed it used just 3 mmaps and 117 major page
faults.

2002-08-14  Jakub Jelinek  <jakub@redhat.com>

	* locale/programs/locarchive.c (add_locale): Put all the small
	locale files together if optimizing for speed, if the small locale
	file block would occupy less pages if 4096 bytes aligned, align it.
	(show_archive_content): Support archives optimized for speed.
	* locale/programs/localedef.c (compact_archive, OPT_COMPACT): New.
	(options): Add --compact.
	(parse_opt): Handle OPT_COMPACT.
	* locale/programs/localedef.h (compact_archive): Declare.

--- libc/locale/programs/locarchive.c.jj	Tue Aug 13 02:01:51 2002
+++ libc/locale/programs/locarchive.c	Tue Aug 13 18:58:23 2002
@@ -575,25 +575,48 @@ add_locale (struct locarhandle *ah,
   unsigned int num_new_offsets = 0;
   struct sumhashent *sumhashtab;
   uint32_t hval;
-  unsigned int cnt;
-  unsigned int idx;
+  unsigned int cnt, idx;
   struct locarhead *head;
   struct namehashent *namehashent;
   unsigned int incr;
   struct locrecent *locrecent;
+  off64_t lastoffset;
 
   head = ah->addr;
   sumhashtab = (struct sumhashent *) ((char *) ah->addr
 				      + head->sumhash_offset);
 
+  memset (file_offsets, 0, sizeof (file_offsets));
+  data[LC_ALL].size = 0;
+  if (! compact_archive)
+    {
+      char *ptr;
+
+      for (cnt = 0; cnt < __LC_LAST; ++cnt)
+	if (cnt != LC_ALL && cnt != LC_CTYPE && cnt != LC_COLLATE)
+	  data[LC_ALL].size += (data[cnt].size + 15) & -16;
+
+      data[LC_ALL].addr = alloca (data[LC_ALL].size);
+      memset (data[LC_ALL].addr, 0, data[LC_ALL].size);
+
+      ptr = data[LC_ALL].addr;
+      for (cnt = 0; cnt < __LC_LAST; ++cnt)
+	if (cnt != LC_ALL && cnt != LC_CTYPE && cnt != LC_COLLATE)
+	  {
+	    memcpy (ptr, data[cnt].addr, data[cnt].size);
+	    ptr += (data[cnt].size + 15) & -16;
+	  }
+
+      __md5_buffer (data[LC_ALL].addr, data[LC_ALL].size, data[LC_ALL].sum);
+    }
 
   /* For each locale category data set determine whether the same data
      is already somewhere in the archive.  */
   for (cnt = 0; cnt < __LC_LAST; ++cnt)
-    if (cnt != LC_ALL)
+    if ((compact_archive && cnt != LC_ALL)
+	|| (! compact_archive
+	    && (cnt == LC_ALL || cnt == LC_CTYPE || cnt == LC_COLLATE)))
       {
-	/* By default signal that we have no data.  */
-	file_offsets[cnt] = 0;
 	++num_new_offsets;
 
 	/* Compute the hash value of the checksum to determine a
@@ -638,8 +661,11 @@ add_locale (struct locarhandle *ah,
     }
 
   /* Add the locale data which is not yet in the archive.  */
-  for (cnt = 0; cnt < __LC_LAST; ++cnt)
-    if (cnt != LC_ALL && file_offsets[cnt] == 0)
+  for (cnt = 0, lastoffset = 0; cnt < __LC_LAST; ++cnt)
+    if (((compact_archive && cnt != LC_ALL)
+	 || (! compact_archive
+	     && (cnt == LC_ALL || cnt == LC_CTYPE || cnt == LC_COLLATE)))
+	&& file_offsets[cnt] == 0)
       {
 	/* The data for this section is not yet available in the
 	   archive.  Append it.  */
@@ -650,6 +676,25 @@ add_locale (struct locarhandle *ah,
 	if (lastpos == (off64_t) -1)
 	  error (EXIT_FAILURE, errno, _("cannot add to locale archive"));
 
+	/* If optimizing for speed and block of small locales would cross
+	   4096 boundary, align it unless it follows immediately LC_CTYPE
+	   or LC_COLLATE.  */
+	if (! compact_archive && cnt == LC_ALL && lastoffset != lastpos)
+	  {
+	    if ((((lastpos & 4095) + data[cnt].size + 4095) & -4096)
+		> ((data[cnt].size + 4095) & -4096))
+	      {
+		size_t sz = 4096 - (lastpos & 4095);
+		char *zeros = alloca (sz);
+
+		memset (zeros, 0, sz);
+		if (TEMP_FAILURE_RETRY (write (ah->fd, zeros, sz) != sz))
+		  error (EXIT_FAILURE, errno, _("cannot add to locale archive"));
+
+		lastpos += sz;
+	      }
+	  }
+
 	/* Align all data to a 16 byte boundary.  */
 	if ((lastpos & 15) != 0)
 	  {
@@ -664,6 +709,7 @@ add_locale (struct locarhandle *ah,
 
 	/* Remember the position.  */
 	file_offsets[cnt] = lastpos;
+	lastoffset = lastpos + data[cnt].size;
 
 	/* Write the data.  */
 	if (TEMP_FAILURE_RETRY (write (ah->fd, data[cnt].addr, data[cnt].size))
@@ -689,6 +735,17 @@ add_locale (struct locarhandle *ah,
 	++head->sumhash_used;
       }
 
+  if (! compact_archive)
+    {
+      off64_t lastpos = file_offsets[LC_ALL];
+      for (cnt = 0; cnt < __LC_LAST; ++cnt)
+	if (cnt != LC_ALL && cnt != LC_CTYPE && cnt != LC_COLLATE)
+	  {
+	    file_offsets[cnt] = lastpos;
+	    lastpos += (data[cnt].size + 15) & -16;
+	  }
+    }
+
   if (namehashent->name_offset == 0)
     {
       /* Add the name string.  */
@@ -730,11 +787,10 @@ add_locale (struct locarhandle *ah,
 
   /* Fill in the table with the locations of the locale data.  */
   for (cnt = 0; cnt < __LC_LAST; ++cnt)
-    if (cnt != LC_ALL)
-      {
-	locrecent->record[cnt].offset = file_offsets[cnt];
-	locrecent->record[cnt].len = data[cnt].size;
-      }
+    {
+      locrecent->record[cnt].offset = file_offsets[cnt];
+      locrecent->record[cnt].len = data[cnt].size;
+    }
 
   return namehashent->locrec_offset;
 }
@@ -1292,7 +1348,9 @@ show_archive_content (int verbose)
 	  locrec = (struct locrecent *) ((char *) ah.addr
 					 + names[cnt].locrec_offset);
 	  for (idx = 0; idx < __LC_LAST; ++idx)
-	    if (idx != LC_ALL)
+	    if (locrec->record[LC_ALL].offset != 0
+		? (idx == LC_ALL || idx == LC_CTYPE || idx == LC_COLLATE)
+		: idx != LC_ALL)
 	      {
 		struct dataent *data, dataent;
 
@@ -1318,13 +1376,19 @@ show_archive_content (int verbose)
 	      {
 		struct dataent *data, dataent;
 
-		dataent.file_offset = locrec->record[idx].offset;
+		if (idx != LC_CTYPE && idx != LC_COLLATE
+		    && locrec->record[LC_ALL].offset)
+		  dataent.file_offset = locrec->record[LC_ALL].offset;
+		else
+		  dataent.file_offset = locrec->record[idx].offset;
 		data = (struct dataent *) bsearch (&dataent, files, sumused,
 						   sizeof (struct dataent),
 						   dataentcmp);
-		printf ("%6d %7x %3d ",
+		printf ("%6d %7x %3d%c ",
 			locrec->record[idx].len, locrec->record[idx].offset,
-			data->nlink);
+			data->nlink,
+			dataent.file_offset == locrec->record[LC_ALL].offset
+			? '+' : ' ');
 		for (i = 0; i < 16; i += 4)
 		    printf ("%02x%02x%02x%02x",
 			    data->sum[i], data->sum[i + 1],
--- libc/locale/programs/localedef.c.jj	Tue Aug 13 02:01:51 2002
+++ libc/locale/programs/localedef.c	Tue Aug 13 18:59:04 2002
@@ -97,6 +97,9 @@ static bool replace_archive;
 /* If true list archive content.  */
 static bool list_archive;
 
+/* If true make archive more compact.  */
+bool compact_archive;
+
 /* Maximum number of retries when opening the locale archive.  */
 int max_locarchive_open_retry = 10;
 
@@ -114,6 +117,7 @@ void (*argp_program_version_hook) (FILE 
 #define OPT_REPLACE 307
 #define OPT_DELETE_FROM_ARCHIVE 308
 #define OPT_LIST_ARCHIVE 309
+#define OPT_COMPACT 310
 
 /* Definitions of arguments for argp functions.  */
 static const struct argp_option options[] =
@@ -145,6 +149,7 @@ static const struct argp_option options[
   { "list-archive", OPT_LIST_ARCHIVE, NULL, 0, N_("List content of archive") },
   { "alias-file", 'A', "FILE", 0,
     N_("locale.alias file to consult when making archive")},
+  { "compact", OPT_COMPACT, NULL, 0, N_("Optimize archive for space")},
   { NULL, 0, NULL, 0, NULL }
 };
 
@@ -330,6 +335,9 @@ parse_opt (int key, char *arg, struct ar
     case OPT_LIST_ARCHIVE:
       list_archive = true;
       break;
+    case OPT_COMPACT:
+      compact_archive = true;
+      break;
     case 'c':
       force_output = 1;
       break;
--- libc/locale/programs/localedef.h.jj	Tue Aug 13 02:01:51 2002
+++ libc/locale/programs/localedef.h	Tue Aug 13 17:02:48 2002
@@ -119,7 +119,7 @@ extern const char *repertoire_global;
 extern int max_locarchive_open_retry;
 extern bool no_archive;
 extern const char *alias_file;
-
+extern bool compact_archive;
 
 /* Prototypes for a few program-wide used functions.  */
 extern void *xmalloc (size_t __n);

	Jakub


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]