This is the mail archive of the
libc-alpha@sources.redhat.com
mailing list for the glibc project.
bug in mbrtowc
- To: libc-alpha at sources dot redhat dot com
- Subject: bug in mbrtowc
- From: Bruno Haible <haible at ilog dot fr>
- Date: Tue, 22 May 2001 19:05:19 +0200 (CEST)
ISO C 99 says that
mbrtowc(NULL, NULL, 0, ps)
is equivalent to
mbrtowc(NULL, "", 1, ps)
but in glibc they behave differently. Below is a test, where test1() succeeds
and test2() fails, and a fix.
================================ foo.c =======================================
#include <assert.h>
#include <locale.h>
#include <stdio.h>
#include <wchar.h>
void test1 ()
{
wchar_t wc;
mbstate_t s;
wc = 42; /* arbitrary number */
memset (&s, 0, sizeof (s)); /* get s into initial state */
assert (mbrtowc (NULL, "", 1, &s) == 0);
assert (mbsinit (&s));
wc = 42; /* arbitrary number */
memset (&s, 0, sizeof (s)); /* get s into initial state */
assert (mbrtowc (&wc, "\xE2", 1, &s) == (size_t) -2); /* 1st byte processed */
assert (mbrtowc (NULL, "", 1, &s) == (size_t) -1);
wc = 42; /* arbitrary number */
memset (&s, 0, sizeof (s)); /* get s into initial state */
assert (mbrtowc (&wc, "\xE2", 1, &s) == (size_t) -2); /* 1st byte processed */
assert (mbrtowc (&wc, "\x89", 1, &s) == (size_t) -2); /* 2nd byte processed */
assert (mbrtowc (NULL, "", 1, &s) == (size_t) -1);
wc = 42; /* arbitrary number */
memset (&s, 0, sizeof (s)); /* get s into initial state */
assert (mbrtowc (&wc, "\xE2", 1, &s) == (size_t) -2); /* 1st byte processed */
assert (mbrtowc (&wc, "\x89", 1, &s) == (size_t) -2); /* 2nd byte processed */
assert (mbrtowc (&wc, "\xA0", 1, &s) == 1); /* 3nd byte processed */
assert (mbrtowc (NULL, "", 1, &s) == 0);
assert (mbsinit (&s));
}
void test2 ()
{
wchar_t wc;
mbstate_t s;
wc = 42; /* arbitrary number */
memset (&s, 0, sizeof (s)); /* get s into initial state */
assert (mbrtowc (NULL, NULL, 0, &s) == 0);
assert (mbsinit (&s));
wc = 42; /* arbitrary number */
memset (&s, 0, sizeof (s)); /* get s into initial state */
assert (mbrtowc (&wc, "\xE2", 1, &s) == (size_t) -2); /* 1st byte processed */
assert (mbrtowc (NULL, NULL, 0, &s) == (size_t) -1);
wc = 42; /* arbitrary number */
memset (&s, 0, sizeof (s)); /* get s into initial state */
assert (mbrtowc (&wc, "\xE2", 1, &s) == (size_t) -2); /* 1st byte processed */
assert (mbrtowc (&wc, "\x89", 1, &s) == (size_t) -2); /* 2nd byte processed */
assert (mbrtowc (NULL, NULL, 0, &s) == (size_t) -1);
wc = 42; /* arbitrary number */
memset (&s, 0, sizeof (s)); /* get s into initial state */
assert (mbrtowc (&wc, "\xE2", 1, &s) == (size_t) -2); /* 1st byte processed */
assert (mbrtowc (&wc, "\x89", 1, &s) == (size_t) -2); /* 2nd byte processed */
assert (mbrtowc (&wc, "\xA0", 1, &s) == 1); /* 3nd byte processed */
assert (mbrtowc (NULL, NULL, 0, &s) == 0);
assert (mbsinit (&s));
}
int main ()
{
/* UTF-8 single byte feeding test for mbrtowc(). */
const char *locale = "de_DE.UTF-8";
if (!setlocale (LC_CTYPE, locale))
{
fprintf (stderr, "locale '%s' not available!\n", locale);
exit (1);
}
test1 ();
test2 ();
return 0;
}
===============================================================================
The testsuite has also to be changed, because it assumes that
mbrlen (NULL, 0, NULL) will reset mbrlen's hidden state - which is not
true: when mbrlen has accumulated a partial/incomplete multibyte character,
mbrlen (NULL, 0, NULL) must return -1 / EILSEQ, and its state afterwards
is unspecified. (See ISO C 99 sections 7.24.6.3.1 and 7.24.6.3.2.)
ChangeLog:
2001-05-21 Bruno Haible <haible@clisp.cons.org>
* wcsmbs/mbrtowc.c (mbrtowc): Remove local variable 'flush', always
use 0 instead, and rely on the converter to do the flush.
* wcsmbs/tst-mbrtowc.c (utf8_test_1): New function, taken from
utf8_test.
(utf8_test_2, utf8_test_3): New function.
(utf8_test): Call utf8_test_1, utf8_test_2, utf8_test_3.
localedata/ChangeLog:
2001-05-21 Bruno Haible <haible@clisp.cons.org>
* tests-mbwc/dat_mbrlen.c (tst_mbrlen_loc): Use a private mbstate_t
for some tests, because mbrlen (NULL, 0, &s) does not always put back
s into the initial state.
--- glibc-20010430/wcsmbs/mbrtowc.c.bak Mon Apr 9 16:51:43 2001
+++ glibc-20010430/wcsmbs/mbrtowc.c Mon May 21 12:31:06 2001
@@ -42,7 +42,6 @@
size_t dummy;
const unsigned char *inbuf;
char *outbuf = (char *) (pwc ?: buf);
- int flush = 0;
/* Set information for this step. */
data.__invocation_counter = 0;
@@ -58,7 +57,6 @@
outbuf = (char *) buf;
s = "";
n = 1;
- flush = 1;
}
/* Tell where we want the result. */
@@ -72,7 +70,7 @@
inbuf = (const unsigned char *) s;
status = DL_CALL_FCT (__wcsmbs_gconv_fcts.towc->__fct,
(__wcsmbs_gconv_fcts.towc, &data, &inbuf, inbuf + n,
- NULL, &dummy, flush, 1));
+ NULL, &dummy, 0, 1));
/* There must not be any problems with the conversion but illegal input
characters. The output buffer must be large enough, otherwise the
--- glibc-20010430/wcsmbs/tst-mbrtowc.c.bak Thu Jan 11 22:01:06 2001
+++ glibc-20010430/wcsmbs/tst-mbrtowc.c Mon May 21 12:29:06 2001
@@ -27,24 +27,18 @@
static int check_ascii (const char *locname);
-/* Test for mbrtowc, contributed by Markus Kuhn <mkuhn@acm.org>. */
+/* UTF-8 single byte feeding test for mbrtowc(),
+ contributed by Markus Kuhn <mkuhn@acm.org>. */
static int
-utf8_test (void)
+utf8_test_1 (void)
{
- /* UTF-8 single byte feeding test for mbrtowc(). */
wchar_t wc;
mbstate_t s;
- const char *locale = "de_DE.UTF-8";
- if (!setlocale (LC_CTYPE, locale))
- {
- fprintf (stderr, "locale '%s' not available!\n", locale);
- exit (1);
- }
wc = 42; /* arbitrary number */
memset (&s, 0, sizeof (s)); /* get s into initial state */
- assert (mbrtowc (&wc, "\xE2", 1, &s) == (size_t) - 2); /* 1st byte processed */
- assert (mbrtowc (&wc, "\x89", 1, &s) == (size_t) - 2); /* 2nd byte processed */
+ assert (mbrtowc (&wc, "\xE2", 1, &s) == (size_t) -2); /* 1st byte processed */
+ assert (mbrtowc (&wc, "\x89", 1, &s) == (size_t) -2); /* 2nd byte processed */
assert (wc == 42); /* no value has not been stored into &wc yet */
assert (mbrtowc (&wc, "\xA0", 1, &s) == 1); /* 3nd byte processed */
assert (wc == 0x2260); /* E2 89 A0 = U+2260 (not equal) decoded correctly */
@@ -54,6 +48,93 @@
return 0;
}
+/* Test for NUL byte processing via empty string. */
+static int
+utf8_test_2 (void)
+{
+ wchar_t wc;
+ mbstate_t s;
+
+ wc = 42; /* arbitrary number */
+ memset (&s, 0, sizeof (s)); /* get s into initial state */
+ assert (mbrtowc (NULL, "", 1, &s) == 0); /* valid terminator */
+ assert (mbsinit (&s));
+
+ wc = 42; /* arbitrary number */
+ memset (&s, 0, sizeof (s)); /* get s into initial state */
+ assert (mbrtowc (&wc, "\xE2", 1, &s) == (size_t) -2); /* 1st byte processed */
+ assert (mbrtowc (NULL, "", 1, &s) == (size_t) -1); /* invalid terminator */
+
+ wc = 42; /* arbitrary number */
+ memset (&s, 0, sizeof (s)); /* get s into initial state */
+ assert (mbrtowc (&wc, "\xE2", 1, &s) == (size_t) -2); /* 1st byte processed */
+ assert (mbrtowc (&wc, "\x89", 1, &s) == (size_t) -2); /* 2nd byte processed */
+ assert (mbrtowc (NULL, "", 1, &s) == (size_t) -1); /* invalid terminator */
+
+ wc = 42; /* arbitrary number */
+ memset (&s, 0, sizeof (s)); /* get s into initial state */
+ assert (mbrtowc (&wc, "\xE2", 1, &s) == (size_t) -2); /* 1st byte processed */
+ assert (mbrtowc (&wc, "\x89", 1, &s) == (size_t) -2); /* 2nd byte processed */
+ assert (mbrtowc (&wc, "\xA0", 1, &s) == 1); /* 3nd byte processed */
+ assert (mbrtowc (NULL, "", 1, &s) == 0); /* valid terminator */
+ assert (mbsinit (&s));
+
+ return 0;
+}
+
+/* Test for NUL byte processing via NULL string. */
+static int
+utf8_test_3 (void)
+{
+ wchar_t wc;
+ mbstate_t s;
+
+ wc = 42; /* arbitrary number */
+ memset (&s, 0, sizeof (s)); /* get s into initial state */
+ assert (mbrtowc (NULL, NULL, 0, &s) == 0); /* valid terminator */
+ assert (mbsinit (&s));
+
+ wc = 42; /* arbitrary number */
+ memset (&s, 0, sizeof (s)); /* get s into initial state */
+ assert (mbrtowc (&wc, "\xE2", 1, &s) == (size_t) -2); /* 1st byte processed */
+ assert (mbrtowc (NULL, NULL, 0, &s) == (size_t) -1); /* invalid terminator */
+
+ wc = 42; /* arbitrary number */
+ memset (&s, 0, sizeof (s)); /* get s into initial state */
+ assert (mbrtowc (&wc, "\xE2", 1, &s) == (size_t) -2); /* 1st byte processed */
+ assert (mbrtowc (&wc, "\x89", 1, &s) == (size_t) -2); /* 2nd byte processed */
+ assert (mbrtowc (NULL, NULL, 0, &s) == (size_t) -1); /* invalid terminator */
+
+ wc = 42; /* arbitrary number */
+ memset (&s, 0, sizeof (s)); /* get s into initial state */
+ assert (mbrtowc (&wc, "\xE2", 1, &s) == (size_t) -2); /* 1st byte processed */
+ assert (mbrtowc (&wc, "\x89", 1, &s) == (size_t) -2); /* 2nd byte processed */
+ assert (mbrtowc (&wc, "\xA0", 1, &s) == 1); /* 3nd byte processed */
+ assert (mbrtowc (NULL, NULL, 0, &s) == 0); /* valid terminator */
+ assert (mbsinit (&s));
+
+ return 0;
+}
+
+static int
+utf8_test (void)
+{
+ const char *locale = "de_DE.UTF-8";
+ int error = 0;
+
+ if (!setlocale (LC_CTYPE, locale))
+ {
+ fprintf (stderr, "locale '%s' not available!\n", locale);
+ exit (1);
+ }
+
+ error |= utf8_test_1 ();
+ error |= utf8_test_2 ();
+ error |= utf8_test_3 ();
+
+ return error;
+}
+
int
main (void)
--- glibc-20010430/localedata/tests-mbwc/dat_mbrlen.c.bak Fri Aug 18 18:40:52 2000
+++ glibc-20010430/localedata/tests-mbwc/dat_mbrlen.c Tue May 22 00:33:21 2001
@@ -99,21 +99,15 @@
{ /*----------------- #01 -----------------*/
{
{
- { 1, "\317\302", 1, 0, 0 },
-#ifdef SHOJI_IS_RIGHT
- { 0, "", 0, 0, 0 },
-#else
- /* XXX This test depends on the internal state being empty.
- XXX Therefore we must explicitly clean it. */
- { 0, "", 0, 0, 1 },
-#endif
- { 1, "\317\302", USE_MBCURMAX, 0, 0 },
+ { 1, "\317\302", 1, 1, 1 },
+ { 0, "", 0, 1, 0 },
+ { 1, "\317\302", USE_MBCURMAX, 1, 1 },
}
},
{
{
{ 0, 1, -2, },
- { 0, 1, 0, },
+ { 0, 1, -1, },
{ 0, 1, 2, },
}
}