This is the mail archive of the libc-hacker@sources.redhat.com mailing list for the glibc project.

Note that libc-hacker is a closed list. You may look at the archives of this list, but subscription and posting are not open.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]

[PATCH] Fix *scanf and *strto*


Hi!

This patch fixes a few bugs I found in scanf, strtol and the unused
indigits*.h headers. Included is a testcase as well.

BTW: Can we have some testing foo_FOO locale which will have multiple
characters long indigits and thousand separators?
Also, in the I18N fix I assumed (because indigits.h seem to do that) that
if a number starts with a digit from script N, then all the remaining digits
in the number must be from the same script. Is that good assumption?
Otherwise I'd have to slightly modify __vfscanf.

2000-07-27  Jakub Jelinek  <jakub@redhat.com>

	* locale/indigits.h (indigit_value): Correct.
	* locale/indigitswc.h (indigitwc_value): Correct.
	* stdio-common/vfscanf.c (__vfscanf): Fix I18N number
	conversion, add GROUP checking for it, fix GROUP number
	conversion with strlen(thousands) > 1.
	Honour width correctly in the presence of floating
	decimal points and thousands separators.
	* stdio-common/tst-sscanf.c: New test.
	* stdio-common/Makefile: Add it to tests.
	* sysdeps/generic/strtol.c (strtol): Fix conversion if there are
	thousands separators and group argument is non-zero.

--- libc/locale/indigits.h.jj	Sat Jan 29 03:47:32 2000
+++ libc/locale/indigits.h	Thu Jul 27 16:05:03 2000
@@ -32,7 +32,7 @@ indigit_value (const char **s, size_t *l
   int from_level;
   int to_level;
   const char *mbdigits[10];
-  int n;
+  int i, n;
 
   if (*decided != -1)
     from_level = to_level = *decided;
@@ -53,11 +53,12 @@ indigit_value (const char **s, size_t *l
       mbdigits[n] = _NL_CURRENT (LC_CTYPE, _NL_CTYPE_INDIGITS0_MB + n);
       dlen = strlen (mbdigits[n]);
 
-      if (dlen <= len && memcmp (*s, mbdigits[n], dlen) == 0)
+      if (from_level == 0 && dlen <= *len
+	  && memcmp (*s, mbdigits[n], dlen) == 0)
 	{
 	  /* Found it.  */
 	  *s += dlen;
-	  len -= dlen;
+	  *len -= dlen;
 	  if (*decided == -1)
 	    *decided = 0;
 	  return n;
@@ -68,18 +69,19 @@ indigit_value (const char **s, size_t *l
     }
 
   /* Now perform the remaining tests.  */
-  while (++from_level <= to_level)
+  for (i = 1; i <= to_level; i++)
     {
       /* Search all ten digits of this level.  */
       for (n = 0; n < 10; ++n)
 	{
 	  size_t dlen = strlen (mbdigits[n]);
 
-	  if (dlen <= len && memcmp (*s, mbdigits[n], dlen) == 0)
+	  if (i >= from_level && dlen <= *len
+	      && memcmp (*s, mbdigits[n], dlen) == 0)
 	    {
 	      /* Found it.  */
 	      *s += dlen;
-	      len -= dlen;
+	      *len -= dlen;
 	      if (*decided == -1)
 		*decided = from_level;
 	      return n;
@@ -88,9 +90,6 @@ indigit_value (const char **s, size_t *l
 	  /* Advance the pointer to the next string.  */
 	  mbdigits[n] += dlen + 1;
 	}
-
-      /* Next level.  */
-      ++from_level;
     }
 
   /* If we reach this point no matching digit was found.  */
--- libc/locale/indigitswc.h.jj	Thu Feb 17 16:09:55 2000
+++ libc/locale/indigitswc.h	Thu Jul 27 16:05:03 2000
@@ -48,6 +48,7 @@ indigitwc_value (wchar_t wc, int *decide
     {
       /* Get the string for the digits with value N.  */
       wcdigits[n] = _NL_CURRENT (LC_CTYPE, _NL_CTYPE_INDIGITS0_WC + n);
+      wcdigits[n] += from_level;
 
       if (wc == *wcdigits[n])
 	{
@@ -78,9 +79,6 @@ indigitwc_value (wchar_t wc, int *decide
 	  /* Advance the pointer to the next string.  */
 	  ++wcdigits[n];
 	}
-
-      /* Next level.  */
-      ++from_level;
     }
 
   /* If we reach this point no matching digit was found.  */
--- libc/stdio-common/vfscanf.c.jj	Wed Jul 26 14:20:45 2000
+++ libc/stdio-common/vfscanf.c	Thu Jul 27 16:29:11 2000
@@ -1213,6 +1213,7 @@ __vfscanf (FILE *s, const char *format, 
 	    {
 	      int from_level;
 	      int to_level;
+	      int level;
 #ifdef COMPILE_WSCANF
 	      const wchar_t *wcdigits[10];
 #else
@@ -1229,99 +1230,196 @@ __vfscanf (FILE *s, const char *format, 
 					   _NL_CTYPE_INDIGITS_MB_LEN) - 1;
 #endif
 
-	      /* In this round we get the pointer to the digit strings
-		 and also perform the first round of comparisons.  */
-	      for (n = 0; n < 10; ++n)
+	      /* Read the number into workspace.  */
+	      while (c != EOF && width != 0)
 		{
-		  /* Get the string for the digits with value N.  */
+		  /* In this round we get the pointer to the digit strings
+		     and also perform the first round of comparisons.  */
+		  for (n = 0; n < 10; ++n)
+		    {
+		      /* Get the string for the digits with value N.  */
 #ifdef COMPILE_WSCANF
-		  wcdigits[n] = (const wchar_t *)
-		    _NL_CURRENT (LC_CTYPE, _NL_CTYPE_INDIGITS0_WC + n);
-		  if (c == *wcdigits[n])
-		    break;
+		      wcdigits[n] = (const wchar_t *)
+			_NL_CURRENT (LC_CTYPE, _NL_CTYPE_INDIGITS0_WC + n);
+		      wcdigits[n] += from_level;
+
+		      if (c == *wcdigits[n])
+			{
+			  to_level = from_level;
+			  break;
+			}
 
-		  /* Advance the pointer to the next string.  */
-		  ++wcdigits[n];
+		      /* Advance the pointer to the next string.  */
+		      ++wcdigits[n];
 #else
-		  size_t dlen;
-		  size_t dcnt;
+		      const char *cmpp;
+		      int avail = width > 0 ? width : INT_MAX;
 
-		  mbdigits[n] = _NL_CURRENT (LC_CTYPE,
-					     _NL_CTYPE_INDIGITS0_MB + n);
-		  dlen = strlen (mbdigits[n]);
-
-		  dcnt = 0;
-		  do
-		    {
-		      if (c != mbdigits[n][dcnt])
-			break;
-		      c = inchar ();
-		    }
-		  while (--dcnt > 0);
+		      mbdigits[n] = _NL_CURRENT (LC_CTYPE,
+						 _NL_CTYPE_INDIGITS0_MB + n);
 
-		  if (dcnt == 0)
-		    /* We found it.  */
-		    break;
+		      for (level = 0; level < from_level; level++)
+			mbdigits[n] = strchr (mbdigits[n], '\0') + 1;
+
+		      cmpp = mbdigits[n];
+		      while (*cmpp == c && avail > 0)
+			{
+			  if (*++cmpp == '\0')
+			    break;
+			  else
+			    {
+			      if ((c = inchar ()) == EOF)
+				break;
+			      --avail;
+			    }
+			}
+
+		      if (*cmpp == '\0')
+			{
+			  if (width > 0)
+			    width = avail;
+			  to_level = from_level;
+			  break;
+			}
 
-		  /* Advance the pointer to the next string.  */
-		  mbdigits[n] += dlen + 1;
+		      /* We are pushing all read characters back.  */
+		      if (cmpp > mbdigits[n])
+			{
+			  ungetc (c, s);
+			  while (--cmpp > mbdigits[n])
+			    ungetc (*cmpp, s);
+			  c = *cmpp;
+			}
+
+		      /* Advance the pointer to the next string.  */
+		      mbdigits[n] = strchr (mbdigits[n], '\0') + 1;
 #endif
-		}
+		    }
 
-	      if (n == 10)
-		{
-		  /* Have not yet found the digit.  */
-		  while (++from_level <= to_level)
+		  if (n == 10)
 		    {
-		      /* Search all ten digits of this level.  */
-		      for (n = 0; n < 10; ++n)
+		      /* Have not yet found the digit.  */
+		      for (level = from_level + 1; level <= to_level; level++)
 			{
+			  /* Search all ten digits of this level.  */
+			  for (n = 0; n < 10; ++n)
+			    {
 #ifdef COMPILE_WSCANF
-			  if (c == *wcdigits[n])
-			    break;
+			      if (c == *wcdigits[n])
+				break;
 
-			  /* Advance the pointer to the next string.  */
-			  ++wcdigits[n];
+			      /* Advance the pointer to the next string.  */
+			      ++wcdigits[n];
 #else
-			  size_t dlen = strlen (mbdigits[n]);
-			  size_t dcnt;
+			      const char *cmpp;
+			      int avail = width > 0 ? width : INT_MAX;
+
+			      cmpp = mbdigits[n];
+			      while (*cmpp == c && avail > 0)
+				{
+				  if (*++cmpp == '\0')
+				    break;
+				  else
+				    {
+				      if ((c = inchar ()) == EOF)
+					break;
+				      --avail;
+				    }
+				}
+
+			      if (*cmpp == '\0')
+				{
+				  if (width > 0)
+				    width = avail;
+				  break;
+				}
 
-			  dcnt = 0;
-			  do
+			      /* We are pushing all read characters back.  */
+			      if (cmpp > mbdigits[n])
+				{
+				  ungetc (c, s);
+				  while (--cmpp > mbdigits[n])
+				    ungetc (*cmpp, s);
+				  c = *cmpp;
+				}
+
+			      /* Advance the pointer to the next string.  */
+			      mbdigits[n] = strchr (mbdigits[n], '\0') + 1;
+#endif
+			    }
+
+			  if (n < 10)
 			    {
-			      if (c != mbdigits[n][dcnt])
-				break;
-			      c = inchar ();
+			      /* Found it.  */
+			      from_level = level;
+			      to_level = level;
+			      break;
 			    }
-			  while (--dcnt > 0);
+			}
+		    }
+
+		  if (n < 10)
+		    c = L_('0') + n;
+		  else if ((flags & GROUP)
+#ifdef COMPILE_WSCANF
+			   && thousands != L'\0'
+#else
+			   && thousands != NULL
+#endif
+			   )
+		    {
+		      /* Try matching against the thousands separator.  */
+#ifdef COMPILE_WSCANF
+		      if (c != thousands)
+			  break;
+#else
+		      const char *cmpp = thousands;
+		      int avail = width > 0 ? width : INT_MAX;
 
-			  if (dcnt == 0)
-			    /* We found it.  */
+		      while (*cmpp == c && avail > 0)
+			{
+			  ADDW (c);
+			  if (*++cmpp == '\0')
 			    break;
+			  else
+			    {
+			      if ((c = inchar ()) == EOF)
+				break;
+			      --avail;
+			    }
+			}
 
-			  /* Advance the pointer to the next string.  */
-			  mbdigits[n] += dlen + 1;
-#endif
+		      if (*cmpp != '\0')
+			{
+			  /* We are pushing all read characters back.  */
+			  if (cmpp > thousands)
+			    {
+			      wpsize -= cmpp - thousands;
+			      ungetc (c, s);
+			      while (--cmpp > thousands)
+				ungetc (*cmpp, s);
+			      c = *cmpp;
+			    }
+			  break;
 			}
 
-		      if (n < 10)
-			/* Found it.  */
-			break;
+		      if (width > 0)
+			width = avail;
 
-		      /* Next level.  */
-		      ++from_level;
+		      /* The last thousands character will be added back by
+			 the ADDW below.  */
+			--wpsize;
+#endif
 		    }
-		}
+		  else
+		    break;
 
-	      if (n == 10)
-		{
-		  /* Haven't found anything.  Push the last character back
-		     and return an error.  */
-		  ungetc (c, s);
-		  input_error ();
-		}
+		  ADDW (c);
+		  if (width > 0)
+		    --width;
 
-	      ADDW (L_('0') + n);
+		  c = inchar ();
+		}
 	    }
 	  else
 	    /* Read the number into workspace.  */
@@ -1351,20 +1449,24 @@ __vfscanf (FILE *s, const char *format, 
 			int avail = width > 0 ? width : INT_MAX;
 
 			while (*cmpp == c && avail > 0)
-			  if (*++cmpp == '\0')
-			    break;
-			  else
-			    {
-			      if (inchar () == EOF)
-				break;
-			      --avail;
-			    }
+			  {
+			    ADDW (c);
+			    if (*++cmpp == '\0')
+			      break;
+			    else
+			      {
+				if ((c = inchar ()) == EOF)
+				  break;
+				--avail;
+			      }
+			  }
 
 			if (*cmpp != '\0')
 			  {
-			    /* We are pushing all read character back.  */
+			    /* We are pushing all read characters back.  */
 			    if (cmpp > thousands)
 			      {
+				wpsize -= cmpp - thousands;
 				ungetc (c, s);
 				while (--cmpp > thousands)
 				  ungetc (*cmpp, s);
@@ -1372,9 +1474,13 @@ __vfscanf (FILE *s, const char *format, 
 			      }
 			    break;
 			  }
+
 			if (width > 0)
-			  /* +1 because we substract below.  */
-			  width = avail + 1;
+			  width = avail;
+
+			/* The last thousands character will be added back by
+			   the ADDW below.  */
+			--wpsize;
 #endif
 		      }
 		    else
@@ -1527,8 +1633,7 @@ __vfscanf (FILE *s, const char *format, 
 		      conv_error ();
 		    }
 		  if (width > 0)
-		    /* +1 because we substract below.  */
-		    width = avail + 1;
+		    width = avail;
 #endif
 		}
 	      if (width > 0)
@@ -1689,8 +1794,7 @@ __vfscanf (FILE *s, const char *format, 
 		      for (cmpp = decimal; *cmpp != '\0'; ++cmpp)
 			ADDW (*cmpp);
 		      if (width > 0)
-			/* +1 because we substract below.  */
-			width = avail + 1;
+			width = avail;
 		      got_dot = 1;
 		    }
 		  else
@@ -1727,8 +1831,7 @@ __vfscanf (FILE *s, const char *format, 
 			  for (cmpp = thousands; *cmpp != '\0'; ++cmpp)
 			    ADDW (*cmpp);
 			  if (width > 0)
-			    /* +1 because we substract below.  */
-			    width = avail + 1;
+			    width = avail;
 			}
 		      else
 			{
--- libc/stdio-common/Makefile.jj	Thu Jul 27 15:59:43 2000
+++ libc/stdio-common/Makefile	Thu Jul 27 16:32:24 2000
@@ -54,7 +54,7 @@ tests := tstscanf test_rdwr test-popen t
 	 bug1 bug2 bug3 bug4 bug5 bug6 bug7 bug8 bug9 bug10 bug11 bug12 bug13 \
 	 tfformat tiformat tllformat tstdiomisc tst-printfsz tst-wc-printf \
 	 scanf1 scanf2 scanf3 scanf4 scanf5 scanf7 scanf8 scanf9 scanf10 \
-	 scanf12 tst-tmpnam tst-cookie tst-obprintf
+	 scanf12 tst-tmpnam tst-cookie tst-obprintf tst-sscanf
 
 test-srcs = tst-unbputc tst-printf
 
--- libc/stdio-common/tst-sscanf.c.jj	Thu Jul 27 16:05:03 2000
+++ libc/stdio-common/tst-sscanf.c	Thu Jul 27 16:33:39 2000
@@ -0,0 +1,114 @@
+/* Copyright (C) 2000 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   Contributed by Jakub Jelinek <jakub@redhat.com>, 2000.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Library General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Library General Public License for more details.
+
+   You should have received a copy of the GNU Library General Public
+   License along with the GNU C Library; see the file COPYING.LIB.  If not,
+   write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+   Boston, MA 02111-1307, USA.  */
+                                       
+#include <stdlib.h>
+#include <stdio.h>
+#include <locale.h>
+
+const char *str_double[] = {
+  "-.10000E+020.20000E+020.25000E+010.40000E+010.50000E+010.12500E+01",
+  "0.10000E+020.20000E+020.25000E+010.40000E+010.50000E+010.12500E+01",
+  "-1234567E0198765432E0912345678901987654321091234567890198765432109",
+  "-0.1000E+020.20000E+020.25000E+010.40000E+010.50000E+010.12500E+01"
+};
+
+double val_double[] = {
+  -.10000E+02, 0.20000E+02, 0.25000E+01, 0.40000E+01, 0.50000E+01, 0.12500E+01,
+  0.10000E+02, 0.20000E+02, 0.25000E+01, 0.40000E+01, 0.50000E+01, 0.12500E+01,
+  -1234567E01, 98765432E09, 12345678901, 98765432109, 12345678901, 98765432109,
+  -0.1000E+02, 0.20000E+02, 0.25000E+01, 0.40000E+01, 0.50000E+01, 0.12500E+01
+};
+
+const char *str_long[] = {
+  "-12345678987654321123456789987654321123456789987654321",
+  "-12345678987654321123456789987654321123456789987654321",
+  "-12,345,678987,654,321123,456,789987,654,321123,456,789987,654,321",
+  "-12,345,678987,654,321123,456,789987,654,321123,456,789987,654,321"
+};
+
+const char *fmt_long[] = {
+  "%9ld%9ld%9ld%9ld%9ld%9ld",
+  "%I9ld%I9ld%I9ld%I9ld%I9ld%I9ld",
+  "%'11ld%'11ld%'11ld%'11ld%'11ld%'11ld",
+  "%I'11ld%I'11ld%I'11ld%I'11ld%I'11ld%I'11ld"
+};
+
+long val_long[] = {
+  -12345678, 987654321, 123456789, 987654321, 123456789, 987654321
+};
+
+int main()
+{
+  double d[6];
+  long l[6];
+  int i, j;
+  int tst_locale;
+  int result = 0;
+
+  tst_locale = 1;
+  if (tst_locale)
+    if (setlocale(LC_ALL, "en_US") == NULL)
+      {
+	printf ("Failed to set en_US locale, skipping locale related tests\n");
+	tst_locale = 0;
+      }
+
+  for (i = 0; i < 4; i++)
+    {
+      if (sscanf(str_double[i], "%11lf%11lf%11lf%11lf%11lf%11lf",
+		 &d[0], &d[1], &d[2], &d[3], &d[4], &d[5]) != 6)
+	{
+	  printf ("Double sscanf test %d wrong number of "
+		  "assigned inputs\n", i);
+	  result = 1;
+	}
+      else
+	for (j = 0; j < 6; j++)
+	  if (d[j] != val_double[6 * i + j])
+	    {
+	      printf ("Double sscanf test %d failed\n", i);
+	      result = 1;
+	      break;
+	    }
+    }
+
+  for (i = 0; i < 4; i++)
+    {
+      if (sscanf(str_long[i], fmt_long[i],
+		 &l[0], &l[1], &l[2], &l[3], &l[4], &l[5]) != 6)
+	{
+	  printf ("Integer sscanf test %d wrong number of "
+		  "assigned inputs\n", i);
+	  result = 1;
+	}
+      else
+	for (j = 0; j < 6; j++)
+	  if (l[j] != val_long[j])
+	    {
+	      printf ("Integer sscanf test %d failed\n", i);
+	      result = 1;
+	      break;
+	    }
+
+      if (! tst_locale)
+	break;
+    }
+  exit (result);
+}
--- libc/sysdeps/generic/strtol.c.jj	Wed Jul 26 14:20:45 2000
+++ libc/sysdeps/generic/strtol.c	Thu Jul 27 16:05:03 2000
@@ -256,7 +256,9 @@ INTERNAL (strtol) (nptr, endptr, base, g
   wchar_t thousands = L'\0';
 # else
   const char *thousands = NULL;
+  size_t thousands_len = 0;
 # endif
+
   /* The numeric grouping specification of the current locale,
      in the format described in <locale.h>.  */
   const char *grouping;
@@ -338,18 +340,25 @@ INTERNAL (strtol) (nptr, endptr, base, g
   save = s;
 
 #ifdef USE_NUMBER_GROUPING
-  if (group)
+  if (base != 10)
+    grouping = NULL;
+
+  if (grouping)
     {
+# ifndef USE_WIDE_CHAR
+      thousands_len = strlen (thousands);
+# endif
+
       /* Find the end of the digit string and check its grouping.  */
       end = s;
       if (
 # ifdef USE_WIDE_CHAR
 	  *s != thousands
 # else
-	  ({ for (cnt = 0; thousands[cnt] != '\0'; ++cnt)
+	  ({ for (cnt = 0; cnt < thousands_len; ++cnt)
 	       if (thousands[cnt] != end[cnt])
 		 break;
-	     thousands[cnt] != '\0'; })
+	     cnt < thousands_len; })
 # endif
 	  )
 	{
@@ -358,10 +367,10 @@ INTERNAL (strtol) (nptr, endptr, base, g
 # ifdef USE_WIDE_CHAR
 		&& c != thousands
 # else
-		&& ({ for (cnt = 0; thousands[cnt] != '\0'; ++cnt)
+		&& ({ for (cnt = 0; cnt < thousands_len; ++cnt)
 		      if (thousands[cnt] != end[cnt])
 			break;
-		      thousands[cnt] != '\0'; })
+		      cnt < thousands_len; })
 # endif
 		&& (!ISALPHA (c)
 		    || (int) (TOUPPER (c) - L_('A') + 10) >= base))
@@ -391,6 +400,28 @@ INTERNAL (strtol) (nptr, endptr, base, g
 	    break;
 	  if (c >= L_('0') && c <= L_('9'))
 	    c -= L_('0');
+#ifdef USE_NUMBER_GROUPING
+# ifdef USE_WIDE_CHAR
+	  else if (grouping && c == thousands)
+	    continue;
+# else
+	  else if (thousands_len)
+	    {
+	      for (cnt = 0; cnt < thousands_len; ++cnt)
+		if (thousands[cnt] != s[cnt])
+		  break;
+	      if (cnt == thousands_len)
+		{
+		  s += thousands_len - 1;
+		  continue;
+		}
+	      if (ISALPHA (c))
+		c = TOUPPER (c) - L_('A') + 10;
+	      else
+		break;
+	    }
+# endif
+#endif
 	  else if (ISALPHA (c))
 	    c = TOUPPER (c) - L_('A') + 10;
 	  else
@@ -417,6 +448,28 @@ INTERNAL (strtol) (nptr, endptr, base, g
 	  break;
 	if (c >= L_('0') && c <= L_('9'))
 	  c -= L_('0');
+#ifdef USE_NUMBER_GROUPING
+# ifdef USE_WIDE_CHAR
+	else if (grouping && c == thousands)
+	  continue;
+# else
+	else if (thousands_len)
+	  {
+	    for (cnt = 0; cnt < thousands_len; ++cnt)
+	      if (thousands[cnt] != s[cnt])
+		break;
+	    if (cnt == thousands_len)
+	      {
+		s += thousands_len - 1;
+		continue;
+	      }
+	    if (ISALPHA (c))
+	      c = TOUPPER (c) - L_('A') + 10;
+	    else
+	      break;
+	  }
+# endif
+#endif
 	else if (ISALPHA (c))
 	  c = TOUPPER (c) - L_('A') + 10;
 	else

	Jakub

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]