On Feb 20 11:14, Corinna Vinschen wrote:
There is a problem with this patch. The code path you have made changes
to
applies when locale is set to "C" or "". In the case of "C" the old code
should still be in place (i.e. if !strcmp(locale, "C")). A check is needed
for when !strcmp(locale, ""). If you make that fix, it should be fine.
Another problem exists with the current code. The return value from
LC_ALL should be a concatenation of the various locale settings separated
by a special character (e.g. ':'). The LC_ALL category needs to check if
that is the form of the input string given and separate them out and call
for each category. This way, the original settings can be restored on a
subsequent call to setlocale() with the string given back from LC_ALL.
This form only applies to LC_ALL and is not valid input for any other
category.
There's more broken in setlocale. For instance, if locale is "C" or
"", the variable locale_name is set to "C". But afterwards, the tests
are still using locale instead of locale_name. And worse, locale[1]
is tested, even though locale could be "" at this point. It also
just occured to me that the current code disallows *any* other setting
of LC_ALL except for "C" or "". I'll rework the function a bit. Stay
tuned.
Ok, here's my new setlocale implementation. It fixes the following
problems:
- Make the static locale buffers bigger (16 instead of 12 bytes). The
reason is that the longest currently supported locale, "C-ISO-8859-1",
has a strlen of 12 bytes. Uh oh...
- Fix the potential access of a byte beyond the incoming locale string
in case the locale string is "".
- Don't return the *previous* locale setting of the category, rather
return the *current* locale setting, as per POSIX. Consequentially
remove the last_lc_ctype and last_lc_messages variables.
- Per POSIX allow the required "POSIX" locale. Map it to the "C" locale
as on Linux.
- If locale is "", honor the environment in the order required by POSIX
for all supported categories.
- If category is LC_ALL, return a colon separated list of the current
settings of all supported categories.
- If category is LC_ALL, check if the incoming locale contains a colon.
If so, use the input to set all supported categories accordingly.
Corinna
* libc/locale/locale.c: Fix documentation.
(__lc_ctype): Raise size to 16 bytes.
(_setlocale_r): Allow "POSIX" locale and map to "C" locale.
Raise size of lc_messages to 16 bytes. Add static lc_all
string array. Handle LC_ALL string according to POSIX.
If locale is the empty string, read the locale settings from
the environment using POSIX rules.
Index: libc/locale/locale.c
===================================================================
RCS file: /cvs/src/src/newlib/libc/locale/locale.c,v
retrieving revision 1.8
diff -u -p -r1.8 locale.c
--- libc/locale/locale.c 23 Apr 2004 21:44:21 -0000 1.8
+++ libc/locale/locale.c 20 Feb 2009 12:07:41 -0000
@@ -42,13 +42,13 @@ execution environment for international
information; <<localeconv>> reports on the settings of the current
locale.
-This is a minimal implementation, supporting only the required <<"C">>
-value for <[locale]>; strings representing other locales are not
-honored unless _MB_CAPABLE is defined in which case three new
-extensions are allowed for LC_CTYPE or LC_MESSAGES only: <<"C-JIS">>,
-<<"C-EUCJP">>, <<"C-SJIS">>, or <<"C-ISO-8859-1">>. (<<"">> is
-also accepted; it represents the default locale
-for an implementation, here equivalent to <<"C">>.)
+This is a minimal implementation, supporting only the required <<"POSIX">>
+and <<"C">> values for <[locale]>; strings representing other locales are not
+honored unless _MB_CAPABLE is defined in which case five extensions
+are allowed for LC_ALL, LC_CTYPE or LC_MESSAGES only: <<"C-UTF-8">>,
+<<"C-JIS">>, <<"C-EUCJP">>, <<"C-SJIS">>, or <<"C-ISO-8859-1">>. (<<"">> is
+also accepted; if given, the settings are read from the corresponding
+LC_* environment variables and $LANG.
If you use <<NULL>> as the <[locale]> argument, <<setlocale>> returns
a pointer to the string representing the current locale (always
@@ -66,9 +66,11 @@ in effect.
<[reent]> is a pointer to a reentrancy structure.
RETURNS
-<<setlocale>> returns either a pointer to a string naming the locale
-currently in effect (always <<"C">> for this implementation, or, if
-the locale request cannot be honored, <<NULL>>.
+A successful call to <<setlocale>> returns a pointer to a string
+naming the locale currently in effect. The string returned by
+<<setlocale>> is such that a subsequent call using that string will
+restore that category (or all categories in case of LC_ALL), to that
+state. On error, <<setlocale>> returns <<NULL>>.
<<localeconv>> returns a pointer to a structure of type <<lconv>>,
which describes the formatting and collating conventions in effect (in
@@ -91,6 +93,7 @@ No supporting OS subroutines are require
#include <string.h>
#include <limits.h>
#include <reent.h>
+#include <stdlib.h>
#ifdef __CYGWIN__
int __declspec(dllexport) __mb_cur_max = 1;
@@ -113,7 +116,7 @@ static _CONST struct lconv lconv =
char * _EXFUN(__locale_charset,(_VOID));
static char *charset = "ISO-8859-1";
-char __lc_ctype[12] = "C";
+char __lc_ctype[16] = "C";
char *
_DEFUN(_setlocale_r, (p, category, locale),
@@ -124,33 +127,57 @@ _DEFUN(_setlocale_r, (p, category, local
#ifndef _MB_CAPABLE
if (locale)
{
- if (strcmp (locale, "C") && strcmp (locale, ""))
- return 0;
+ if (strcmp (locale, "POSIX") && strcmp (locale, "C")
+ && strcmp (locale, ""))
+ return NULL;
p->_current_category = category;
p->_current_locale = locale;
}
return "C";
#else
- static char last_lc_ctype[12] = "C";
- static char lc_messages[12] = "C";
- static char last_lc_messages[12] = "C";
+ static char lc_messages[16] = "C";
+ static char lc_all[32] = "C:C";
if (locale)
{
char *locale_name = (char *)locale;
if (category != LC_CTYPE && category != LC_MESSAGES)
- {
- if (strcmp (locale, "C") && strcmp (locale, ""))
- return 0;
- if (category == LC_ALL)
- {
- strcpy (last_lc_ctype, __lc_ctype);
- strcpy (__lc_ctype, "C");
- strcpy (last_lc_messages, lc_messages);
- strcpy (lc_messages, "C");
- __mb_cur_max = 1;
- }
- }
+ {
+ if (category != LC_ALL)
+ {
+ if (strcmp (locale, "POSIX") && strcmp (locale, "C")
+ && strcmp (locale, ""))
+ return NULL;
+ }
+ else
+ {
+ char *colon, *ret;
+ if ((colon = strchr (locale_name, ':')))
+ {
+ /* Too long, probably invalid anyway. */
+ if (strlen (locale_name) > 31)
+ return NULL;
+ /* Use lc_all as temporary storage, if locale
+ isn't a pointer to lc_all anyway. */
+ if (locale_name != lc_all)
+ strcpy (lc_all, locale_name);
+ colon = strchr (lc_all, ':');
+ *colon++ = '\0';
+ ret = _setlocale_r (p, LC_CTYPE, lc_all);
+ if (ret)
+ _setlocale_r (p, LC_MESSAGES, colon);
+ }
+ else
+ {
+ ret = _setlocale_r (p, LC_CTYPE, locale_name);
+ if (ret)
+ _setlocale_r (p, LC_MESSAGES, locale_name);
+ }
+ stpcpy (stpcpy (stpcpy (lc_all, __lc_ctype), ":"),
+ lc_messages);
+ return lc_all;
+ }
+ }
else
{
if (locale[0] == 'C' && locale[1] == '-')
@@ -181,22 +208,36 @@ _DEFUN(_setlocale_r, (p, category, local
return 0;
}
}
- else
- {
- if (strcmp (locale, "C") && strcmp (locale, ""))
- return 0;
- locale_name = "C"; /* C is always the default locale */
- }
-
+ else if (!locale[0])
+ {
+ /* Per POSIX always check LC_ALL first, then the actual
+ locale category, then LANG. */
+ if ((locale_name = _getenv_r (p, "LC_ALL")))
+ ;
+ else if (category == LC_CTYPE
+ && (locale_name = _getenv_r (p, "LC_CTYPE")))
+ ;
+ else if (category == LC_MESSAGES
+ && (locale_name = _getenv_r (p, "LC_MESSAGES")))
+ ;
+ else if ((locale_name = _getenv_r (p, "LANG"))
+ && (locale_name = strchr (locale_name, '.')))
+ ;
+ else
+ locale_name = "C";
+ }
+ else if (!strcmp (locale, "POSIX"))
+ locale_name = "C";
+ else if (strcmp (locale, "C"))
+ return 0;
if (category == LC_CTYPE)
{
- strcpy (last_lc_ctype, __lc_ctype);
strcpy (__lc_ctype, locale_name);
__mb_cur_max = 1;
- if (locale[1] == '-')
+ if (locale_name[1] == '-')
{
- switch (locale[2])
+ switch (locale_name[2])
{
case 'U':
__mb_cur_max = 6;
@@ -218,13 +259,12 @@ _DEFUN(_setlocale_r, (p, category, local
}
else
{
- strcpy (last_lc_messages, lc_messages);
strcpy (lc_messages, locale_name);
charset = "ISO-8859-1";
- if (locale[1] == '-')
+ if (locale_name[1] == '-')
{
- switch (locale[2])
+ switch (locale_name[2])
{
case 'U':
charset = "UTF-8";
@@ -248,12 +288,12 @@ _DEFUN(_setlocale_r, (p, category, local
}
}
p->_current_category = category;
- p->_current_locale = locale;
+ p->_current_locale = locale_name;
if (category == LC_CTYPE)
- return last_lc_ctype;
+ return __lc_ctype;
else if (category == LC_MESSAGES)
- return last_lc_messages;
+ return lc_messages;
}
else
{