This is the mail archive of the
libc-alpha@sources.redhat.com
mailing list for the glibc project.
bug in ISO-2022-JP-2 converter
- From: Bruno Haible <bruno at clisp dot org>
- To: libc-alpha at sources dot redhat dot com
- Date: Mon, 27 May 2002 18:45:51 +0200 (CEST)
- Subject: bug in ISO-2022-JP-2 converter
The converter from UCS-4 to ISO-2022-JP-2 crashes when fed with characters
outside the BMP. Example:
========================================================
#include <iconv.h>
int main ()
{
iconv_t cd = iconv_open ("ISO-2022-JP-2", "WCHAR_T");
unsigned int ch = 0x10000;
unsigned char buf[8];
const char *inptr = (const char*)&ch;
size_t insize = sizeof (ch);
char *outptr = (char*)&buf[0];
size_t outsize = sizeof (buf);
iconv (cd, &inptr, &insize, &outptr, &outsize);
return 0;
}
=========================================================
The reason is a loop
while (ch > rp->end)
++rp;
which does not notice that the end of the table, into which rp points,
has been reached.
Here is a fix.
2002-05-26 Bruno Haible <bruno@clisp.org>
* iconvdata/iso-2022-jp.c (BODY for TO_LOOP): Avoid running off the
end of the ISO-8859-7 from idx table.
--- glibc-20020425/iconvdata/iso-2022-jp.c.bak Sat May 11 17:25:32 2002
+++ glibc-20020425/iconvdata/iso-2022-jp.c Sun May 26 01:45:32 2002
@@ -708,25 +708,29 @@
} \
else if (set2 == ISO88597_set) \
{ \
- const struct gap *rp = from_idx; \
- \
- while (ch > rp->end) \
- ++rp; \
- if (ch >= rp->start) \
+ if (__builtin_expect (ch < 0xffff, 1)) \
{ \
- unsigned char res = iso88597_from_ucs4[ch - 0xa0 + rp->idx]; \
- if (res != '\0') \
+ const struct gap *rp = from_idx; \
+ \
+ while (ch > rp->end) \
+ ++rp; \
+ if (ch >= rp->start) \
{ \
- if (__builtin_expect (outptr + 3 > outend, 0)) \
+ unsigned char res = \
+ iso88597_from_ucs4[ch - 0xa0 + rp->idx]; \
+ if (res != '\0') \
{ \
- result = __GCONV_FULL_OUTPUT; \
- break; \
- } \
+ if (__builtin_expect (outptr + 3 > outend, 0)) \
+ { \
+ result = __GCONV_FULL_OUTPUT; \
+ break; \
+ } \
\
- *outptr++ = ESC; \
- *outptr++ = 'N'; \
- *outptr++ = res; \
- written = 3; \
+ *outptr++ = ESC; \
+ *outptr++ = 'N'; \
+ *outptr++ = res; \
+ written = 3; \
+ } \
} \
} \
} \
@@ -817,43 +821,45 @@
} \
\
/* Try ISO 8859-7 upper half. */ \
- { \
- const struct gap *rp = from_idx; \
+ if (__builtin_expect (ch < 0xffff, 1)) \
+ { \
+ const struct gap *rp = from_idx; \
\
- while (ch > rp->end) \
- ++rp; \
- if (ch >= rp->start) \
- { \
- unsigned char res = \
- iso88597_from_ucs4[ch - 0xa0 + rp->idx]; \
- if (res != '\0') \
- { \
- if (set2 != ISO88597_set) \
- { \
- if (__builtin_expect (outptr + 3 > outend, 0))\
- { \
- result = __GCONV_FULL_OUTPUT; \
- break; \
- } \
- *outptr++ = ESC; \
- *outptr++ = '.'; \
- *outptr++ = 'F'; \
- set2 = ISO88597_set; \
- } \
- \
- if (__builtin_expect (outptr + 3 > outend, 0)) \
- { \
- result = __GCONV_FULL_OUTPUT; \
- break; \
- } \
- *outptr++ = ESC; \
- *outptr++ = 'N'; \
- *outptr++ = res; \
- result = __GCONV_OK; \
- break; \
- } \
- } \
- } \
+ while (ch > rp->end) \
+ ++rp; \
+ if (ch >= rp->start) \
+ { \
+ unsigned char res = \
+ iso88597_from_ucs4[ch - 0xa0 + rp->idx]; \
+ if (res != '\0') \
+ { \
+ if (set2 != ISO88597_set) \
+ { \
+ if (__builtin_expect (outptr + 3 > outend, \
+ 0)) \
+ { \
+ result = __GCONV_FULL_OUTPUT; \
+ break; \
+ } \
+ *outptr++ = ESC; \
+ *outptr++ = '.'; \
+ *outptr++ = 'F'; \
+ set2 = ISO88597_set; \
+ } \
+ \
+ if (__builtin_expect (outptr + 3 > outend, 0)) \
+ { \
+ result = __GCONV_FULL_OUTPUT; \
+ break; \
+ } \
+ *outptr++ = ESC; \
+ *outptr++ = 'N'; \
+ *outptr++ = res; \
+ result = __GCONV_OK; \
+ break; \
+ } \
+ } \
+ } \
\
break; \
\