This is the mail archive of the
libc-alpha@sources.redhat.com
mailing list for the glibc project.
[PATCH] Allow optimizing locale-archive for speed or size
- From: Jakub Jelinek <jakub at redhat dot com>
- To: roland at redhat dot com, drepper at redhat dot com
- Cc: libc-alpha at sources dot redhat dot com
- Date: Tue, 13 Aug 2002 19:28:03 -0400
- Subject: [PATCH] Allow optimizing locale-archive for speed or size
- Reply-to: Jakub Jelinek <jakub at redhat dot com>
Hi!
The following patch adds --compact option to localedef and differentiates
between optimization for speed (the default) and for size (with --compact
option).
On my test box, locale-archive with 487 locales (almost half of it
were UTF-8 (=big) ones) with --compact option (identical
to localedef without this patch) was 29359882B, without --compact option
29863728B, ie. ~ 1.7% size increase.
The advantage of this is that all the tiny locale files (where tiny is
defined as all but LC_COLLATE and LC_CTYPE - LC_TIME is questionable
but for more than half of the locales all the small ones including LC_TIME
fit into less than 4096 bytes, for the remaining ones they are a few bytes
above 4K) are together, so glibc can do fewer mmap calls.
E.g. LC_ALL=en_US.utf8 echo on that box had to use 7 mmap calls on the
locale-archive (and 122 major page faults in time output) while with
locale-archive optimized for speed it used just 3 mmaps and 117 major page
faults.
2002-08-14 Jakub Jelinek <jakub@redhat.com>
* locale/programs/locarchive.c (add_locale): Put all the small
locale files together if optimizing for speed, if the small locale
file block would occupy less pages if 4096 bytes aligned, align it.
(show_archive_content): Support archives optimized for speed.
* locale/programs/localedef.c (compact_archive, OPT_COMPACT): New.
(options): Add --compact.
(parse_opt): Handle OPT_COMPACT.
* locale/programs/localedef.h (compact_archive): Declare.
--- libc/locale/programs/locarchive.c.jj Tue Aug 13 02:01:51 2002
+++ libc/locale/programs/locarchive.c Tue Aug 13 18:58:23 2002
@@ -575,25 +575,48 @@ add_locale (struct locarhandle *ah,
unsigned int num_new_offsets = 0;
struct sumhashent *sumhashtab;
uint32_t hval;
- unsigned int cnt;
- unsigned int idx;
+ unsigned int cnt, idx;
struct locarhead *head;
struct namehashent *namehashent;
unsigned int incr;
struct locrecent *locrecent;
+ off64_t lastoffset;
head = ah->addr;
sumhashtab = (struct sumhashent *) ((char *) ah->addr
+ head->sumhash_offset);
+ memset (file_offsets, 0, sizeof (file_offsets));
+ data[LC_ALL].size = 0;
+ if (! compact_archive)
+ {
+ char *ptr;
+
+ for (cnt = 0; cnt < __LC_LAST; ++cnt)
+ if (cnt != LC_ALL && cnt != LC_CTYPE && cnt != LC_COLLATE)
+ data[LC_ALL].size += (data[cnt].size + 15) & -16;
+
+ data[LC_ALL].addr = alloca (data[LC_ALL].size);
+ memset (data[LC_ALL].addr, 0, data[LC_ALL].size);
+
+ ptr = data[LC_ALL].addr;
+ for (cnt = 0; cnt < __LC_LAST; ++cnt)
+ if (cnt != LC_ALL && cnt != LC_CTYPE && cnt != LC_COLLATE)
+ {
+ memcpy (ptr, data[cnt].addr, data[cnt].size);
+ ptr += (data[cnt].size + 15) & -16;
+ }
+
+ __md5_buffer (data[LC_ALL].addr, data[LC_ALL].size, data[LC_ALL].sum);
+ }
/* For each locale category data set determine whether the same data
is already somewhere in the archive. */
for (cnt = 0; cnt < __LC_LAST; ++cnt)
- if (cnt != LC_ALL)
+ if ((compact_archive && cnt != LC_ALL)
+ || (! compact_archive
+ && (cnt == LC_ALL || cnt == LC_CTYPE || cnt == LC_COLLATE)))
{
- /* By default signal that we have no data. */
- file_offsets[cnt] = 0;
++num_new_offsets;
/* Compute the hash value of the checksum to determine a
@@ -638,8 +661,11 @@ add_locale (struct locarhandle *ah,
}
/* Add the locale data which is not yet in the archive. */
- for (cnt = 0; cnt < __LC_LAST; ++cnt)
- if (cnt != LC_ALL && file_offsets[cnt] == 0)
+ for (cnt = 0, lastoffset = 0; cnt < __LC_LAST; ++cnt)
+ if (((compact_archive && cnt != LC_ALL)
+ || (! compact_archive
+ && (cnt == LC_ALL || cnt == LC_CTYPE || cnt == LC_COLLATE)))
+ && file_offsets[cnt] == 0)
{
/* The data for this section is not yet available in the
archive. Append it. */
@@ -650,6 +676,25 @@ add_locale (struct locarhandle *ah,
if (lastpos == (off64_t) -1)
error (EXIT_FAILURE, errno, _("cannot add to locale archive"));
+ /* If optimizing for speed and block of small locales would cross
+ 4096 boundary, align it unless it follows immediately LC_CTYPE
+ or LC_COLLATE. */
+ if (! compact_archive && cnt == LC_ALL && lastoffset != lastpos)
+ {
+ if ((((lastpos & 4095) + data[cnt].size + 4095) & -4096)
+ > ((data[cnt].size + 4095) & -4096))
+ {
+ size_t sz = 4096 - (lastpos & 4095);
+ char *zeros = alloca (sz);
+
+ memset (zeros, 0, sz);
+ if (TEMP_FAILURE_RETRY (write (ah->fd, zeros, sz) != sz))
+ error (EXIT_FAILURE, errno, _("cannot add to locale archive"));
+
+ lastpos += sz;
+ }
+ }
+
/* Align all data to a 16 byte boundary. */
if ((lastpos & 15) != 0)
{
@@ -664,6 +709,7 @@ add_locale (struct locarhandle *ah,
/* Remember the position. */
file_offsets[cnt] = lastpos;
+ lastoffset = lastpos + data[cnt].size;
/* Write the data. */
if (TEMP_FAILURE_RETRY (write (ah->fd, data[cnt].addr, data[cnt].size))
@@ -689,6 +735,17 @@ add_locale (struct locarhandle *ah,
++head->sumhash_used;
}
+ if (! compact_archive)
+ {
+ off64_t lastpos = file_offsets[LC_ALL];
+ for (cnt = 0; cnt < __LC_LAST; ++cnt)
+ if (cnt != LC_ALL && cnt != LC_CTYPE && cnt != LC_COLLATE)
+ {
+ file_offsets[cnt] = lastpos;
+ lastpos += (data[cnt].size + 15) & -16;
+ }
+ }
+
if (namehashent->name_offset == 0)
{
/* Add the name string. */
@@ -730,11 +787,10 @@ add_locale (struct locarhandle *ah,
/* Fill in the table with the locations of the locale data. */
for (cnt = 0; cnt < __LC_LAST; ++cnt)
- if (cnt != LC_ALL)
- {
- locrecent->record[cnt].offset = file_offsets[cnt];
- locrecent->record[cnt].len = data[cnt].size;
- }
+ {
+ locrecent->record[cnt].offset = file_offsets[cnt];
+ locrecent->record[cnt].len = data[cnt].size;
+ }
return namehashent->locrec_offset;
}
@@ -1292,7 +1348,9 @@ show_archive_content (int verbose)
locrec = (struct locrecent *) ((char *) ah.addr
+ names[cnt].locrec_offset);
for (idx = 0; idx < __LC_LAST; ++idx)
- if (idx != LC_ALL)
+ if (locrec->record[LC_ALL].offset != 0
+ ? (idx == LC_ALL || idx == LC_CTYPE || idx == LC_COLLATE)
+ : idx != LC_ALL)
{
struct dataent *data, dataent;
@@ -1318,13 +1376,19 @@ show_archive_content (int verbose)
{
struct dataent *data, dataent;
- dataent.file_offset = locrec->record[idx].offset;
+ if (idx != LC_CTYPE && idx != LC_COLLATE
+ && locrec->record[LC_ALL].offset)
+ dataent.file_offset = locrec->record[LC_ALL].offset;
+ else
+ dataent.file_offset = locrec->record[idx].offset;
data = (struct dataent *) bsearch (&dataent, files, sumused,
sizeof (struct dataent),
dataentcmp);
- printf ("%6d %7x %3d ",
+ printf ("%6d %7x %3d%c ",
locrec->record[idx].len, locrec->record[idx].offset,
- data->nlink);
+ data->nlink,
+ dataent.file_offset == locrec->record[LC_ALL].offset
+ ? '+' : ' ');
for (i = 0; i < 16; i += 4)
printf ("%02x%02x%02x%02x",
data->sum[i], data->sum[i + 1],
--- libc/locale/programs/localedef.c.jj Tue Aug 13 02:01:51 2002
+++ libc/locale/programs/localedef.c Tue Aug 13 18:59:04 2002
@@ -97,6 +97,9 @@ static bool replace_archive;
/* If true list archive content. */
static bool list_archive;
+/* If true make archive more compact. */
+bool compact_archive;
+
/* Maximum number of retries when opening the locale archive. */
int max_locarchive_open_retry = 10;
@@ -114,6 +117,7 @@ void (*argp_program_version_hook) (FILE
#define OPT_REPLACE 307
#define OPT_DELETE_FROM_ARCHIVE 308
#define OPT_LIST_ARCHIVE 309
+#define OPT_COMPACT 310
/* Definitions of arguments for argp functions. */
static const struct argp_option options[] =
@@ -145,6 +149,7 @@ static const struct argp_option options[
{ "list-archive", OPT_LIST_ARCHIVE, NULL, 0, N_("List content of archive") },
{ "alias-file", 'A', "FILE", 0,
N_("locale.alias file to consult when making archive")},
+ { "compact", OPT_COMPACT, NULL, 0, N_("Optimize archive for space")},
{ NULL, 0, NULL, 0, NULL }
};
@@ -330,6 +335,9 @@ parse_opt (int key, char *arg, struct ar
case OPT_LIST_ARCHIVE:
list_archive = true;
break;
+ case OPT_COMPACT:
+ compact_archive = true;
+ break;
case 'c':
force_output = 1;
break;
--- libc/locale/programs/localedef.h.jj Tue Aug 13 02:01:51 2002
+++ libc/locale/programs/localedef.h Tue Aug 13 17:02:48 2002
@@ -119,7 +119,7 @@ extern const char *repertoire_global;
extern int max_locarchive_open_retry;
extern bool no_archive;
extern const char *alias_file;
-
+extern bool compact_archive;
/* Prototypes for a few program-wide used functions. */
extern void *xmalloc (size_t __n);
Jakub