This is the mail archive of the libc-alpha@sources.redhat.com mailing list for the glibc project.
Index Nav: | [Date Index] [Subject Index] [Author Index] [Thread Index] | |
---|---|---|
Message Nav: | [Date Prev] [Date Next] | [Thread Prev] [Thread Next] |
Other format: | [Raw text] |
The following patch adds support for reading localized digits in floating-points numbers. It has almost no penalty when 'I' flag is not used or needed map is not defined in locale file. It also supports the situations when we don't have decimal-point or thousands-sep defined in locale file. Attached is the patch with a test-case. Hamed 2005-06-28 Hamed Malek <hamed@bamdad.org> * stdio-common/vfscanf.c: Add support for reading localized digits in floating-point numbers.
Index: libc/stdio-common/vfscanf.c =================================================================== RCS file: /cvs/glibc/libc/stdio-common/vfscanf.c,v retrieving revision 1.113 diff -u -r1.113 vfscanf.c --- libc/stdio-common/vfscanf.c 26 Apr 2005 20:52:22 -0000 1.113 +++ libc/stdio-common/vfscanf.c 28 Jun 2005 12:22:37 -0000 @@ -1602,6 +1602,53 @@ if (c == EOF) input_error (); + /* "to_inpunct" is a map from ASCII digits to their + equivalent in locale. This is defined for locales + which use an extra digit set. */ + wctrans_t map = __wctrans ("to_inpunct"); + wint_t wcdigit[12]; +#ifndef COMPILE_WSCANF + char *mbdigits[12]; +#endif + if (((flags & I18N) != 0) && __builtin_expect (map != NULL, 0)) + { + int n; + + for (n = 0; n < 12; ++n) + { + if (n < 10) + wcdigit[n] = __towctrans (L'0' + n, map); + else if (n == 10) + wcdigit[n] = __towctrans (L'.', map); + else + wcdigit[n] = __towctrans (L',', map); + +#ifndef COMPILE_WSCANF + mbdigits[n] = (char *) alloca (MB_LEN_MAX * sizeof (char)); + + mbstate_t state; + memset (&state, '\0', sizeof (state)); + + size_t mblen; + if ((mblen = __wcrtomb (mbdigits[n], wcdigit[n], &state)) == (size_t) -1) + { + if (n == 10) + memcpy (mbdigits[n], decimal, strlen (decimal) + 1); + else if (n == 11) + memcpy (mbdigits[n], thousands, strlen (thousands) + 1); + else + { + /* Ignore checking against localized digits. */ + map = NULL; + break; + } + } + else + mbdigits[n][mblen] = '\0'; +#endif + } + } + got_dot = got_e = 0; /* Check for a sign. */ @@ -1617,9 +1664,12 @@ #ifdef COMPILE_WSCANF if (c != decimal) { - /* This is no valid number. */ - ungetc (c, s); - conv_error (); + if (((flags & I18N) == 0) || __builtin_expect (map == NULL, 1)) + { + /* This is no valid number. */ + ungetc (c, s); + conv_error (); + } } #else /* Match against the decimal point. At this point @@ -1651,7 +1701,10 @@ c = (unsigned char) *--cmpp; } - conv_error (); + if (((flags & I18N) == 0) || __builtin_expect (map == NULL, 1)) + conv_error (); + else + c = inchar (); } else { @@ -1882,6 +1935,128 @@ } while (width != 0 && inchar () != EOF); + /* We may have localized digits, if we have not read + any character or have just read decimal point. */ + + if (((flags & I18N) != 0) && __builtin_expect (map != NULL, 0)) + { +#ifdef COMPILE_WSCANF + if (wpsize == 0 || (got_dot && wpsize == 1 && wcdigit[10] == decimal)) +#else + if (wpsize == 0 || (got_dot && wpsize == strlen (decimal) && + strcmp (decimal, mbdigits[10]) == 0)) +#endif + { + while (width != 0 && inchar () != EOF) + { + if (got_e && wp[wpsize - 1] == exp_char + && (c == L_('-') || c == L_('+'))) + ADDW (c); + else if (wpsize > 0 && !got_e + && (CHAR_T) TOLOWER (c) == exp_char) + { + ADDW (exp_char); + got_e = got_dot = 1; + } + else + { + /* Checking against localized digits plus + decimal point and thousands-sep. */ + int n; + for (n = 0; n < 12; ++n) + { +#ifdef COMPILE_WSCANF + if (c == wcdigit[n]) + { + if (n < 10) + ADDW (L_('0') + n); + else if (n == 10 && !got_dot) + { + ADDW (decimal); + got_dot = 1; + } + else if (n == 11 && (flags & GROUP) != 0 + && thousands != L'\0' && ! got_dot) + ADDW (thousands); + else + { + /* The last read character is not part of the number + anymore. */ + n = 12; + } + break; + } +#else + const char *cmpp = mbdigits[n]; + int avail = width > 0 ? width : INT_MAX; + + while ((unsigned char) *cmpp == c && avail > 0) + { + if (*++cmpp == '\0') + break; + else + { + if (inchar () == EOF) + break; + --avail; + } + } + + if (*cmpp == '\0') + { + if (width > 0) + width = avail; + + if (n < 10) + ADDW (L_('0') + n); + else if (n == 10 && !got_dot) + { + /* Add all the characters. */ + for (cmpp = decimal; *cmpp != '\0'; ++cmpp) + ADDW ((unsigned char) *cmpp); + + got_dot = 1; + } + else if (n == 11 && (flags & GROUP) != 0 + && thousands != NULL && ! got_dot) + { + /* Add all the characters. */ + for (cmpp = thousands; *cmpp != '\0'; ++cmpp) + ADDW ((unsigned char) *cmpp); + } + else + { + /* The last read character is not part of the number + anymore. */ + n = 12; + } + break; + } + + /* We are pushing all read characters back. */ + if (cmpp > mbdigits[n]) + { + ungetc (c, s); + while (--cmpp > mbdigits[n]) + ungetc_not_eof ((unsigned char) *cmpp, s); + c = (unsigned char) *cmpp; + } +#endif + } + if (n == 12) + { + /* The last read character is not part of the number + anymore. */ + ungetc (c, s); + break; + } + } + if (width > 0) + --width; + } + } + } + /* Have we read any character? If we try to read a number in hexadecimal notation and we have read only the `0x' prefix or no exponent this is an error. */
#include <stdio.h> #include <locale.h> #include <assert.h> #define P0 "\xDB\xB0" #define P1 "\xDB\xB1" #define P2 "\xDB\xB2" #define P3 "\xDB\xB3" #define P4 "\xDB\xB4" #define P5 "\xDB\xB5" #define P6 "\xDB\xB6" #define P7 "\xDB\xB7" #define P8 "\xDB\xB8" #define P9 "\xDB\xB9" #define PD "\xd9\xab" #define PT "\xd9\xac" void check_sscanf (const char *s, const char *format, const float n) { float f; sscanf (s, format, &f); if (f != n) printf ("got %f expected %f \n", f, n); } int main () { setlocale (LC_ALL, "fa_IR"); check_sscanf (P3 PD P1 P4, "%I8f", 3.14); check_sscanf (P3 PT P1 P4 P5, "%I'f", 3145); check_sscanf (P3 PD P1 P4 P1 P5 P9, "%If", 3.14159); check_sscanf ("-" P3 PD P1 P4 P1 P5, "%If", -3.1415); check_sscanf ("+" PD P1 P4 P1 P5, "%If", +.1415); check_sscanf (P3 PD P1 P4 P1 P5 "e+" P2, "%Ie", 3.1415e+2); return 0; }
Index Nav: | [Date Index] [Subject Index] [Author Index] [Thread Index] | |
---|---|---|
Message Nav: | [Date Prev] [Date Next] | [Thread Prev] [Thread Next] |