This is the mail archive of the
libc-alpha@sources.redhat.com
mailing list for the glibc project.
bugs in ISO-2022-JP-2 converter
- To: libc-alpha at sources dot redhat dot com
- Subject: bugs in ISO-2022-JP-2 converter
- From: Bruno Haible <haible at ilog dot fr>
- Date: Wed, 20 Sep 2000 15:35:19 +0200 (CEST)
The ISO-2022-JP-2 converter has three bugs:
- An off-by-one invalid array access to iso88597_to_ucs4[].
- U+00A0 can in some cases be converted to ESC N 0xA0, which is not 7-bit.
- The greek alphabet is incorrectly converted, because iso8859-7jp.h is
incorrectly generated (the sed script looks for uppercase hexadecimal
digits in the charmap, but they are now lowercase).
This patch fixes them.
2000-09-20 Bruno Haible <haible@clisp.cons.org>
* iconvdata/iso-2022-jp.c (BODY for FROM_LOOP): Reject 0x80.
(BODY for TO_LOOP): Clear bit 7 when outputting ISO-8859-1 upper half
characters.
* iconvdata/gen-8bit-gap-1.sh: Recognize lowercase hexadecimal digits
as equivalent to uppercase hexadecimal digits.
*** glibc-20000914/iconvdata/iso-2022-jp.c.bak Wed Aug 30 23:43:54 2000
--- glibc-20000914/iconvdata/iso-2022-jp.c Wed Sep 20 00:39:10 2000
***************
*** 359,365 ****
else if (__builtin_expect (set2, ISO88597_set) == ISO88597_set) \
{ \
/* We use the table from the ISO 8859-7 module. */ \
! if (inptr[2] < 0x20 || inptr[2] > 0x80) \
{ \
if (! ignore_errors_p ()) \
{ \
--- 359,365 ----
else if (__builtin_expect (set2, ISO88597_set) == ISO88597_set) \
{ \
/* We use the table from the ISO 8859-7 module. */ \
! if (inptr[2] < 0x20 || inptr[2] >= 0x80) \
{ \
if (! ignore_errors_p ()) \
{ \
***************
*** 800,806 ****
} \
*outptr++ = ESC; \
*outptr++ = 'N'; \
! *outptr++ = ch; \
} \
else \
{ \
--- 800,806 ----
} \
*outptr++ = ESC; \
*outptr++ = 'N'; \
! *outptr++ = ch - 0x80; \
} \
else \
{ \
*** glibc-20000914/iconvdata/gen-8bit-gap-1.sh.bak Mon Jul 3 16:39:27 2000
--- glibc-20000914/iconvdata/gen-8bit-gap-1.sh Tue Sep 19 12:53:06 2000
***************
*** 1,15 ****
#! /bin/sh
echo "static const uint32_t iso88597_to_ucs4[96] = {"
sed -ne '/^[^[:space:]]*[[:space:]]*.x00/d' \
! -e 's/^<U\(....\)>[[:space:]]*.x\([A-F].\).*/ [0x\2 - 0xA0] = 0x\1,/p' \
"$@" | sort -u
echo "};"
echo "static const struct gap from_idx[] = {"
! sed -ne 's/^<U\(....\)>[[:space:]]*.x[A-F]..*/\1/p' \
"$@" | sort -u | $AWK -f gap.awk
echo " { start: 0xffff, end: 0xffff, idx: 0 }"
echo "};"
echo "static const char iso88597_from_ucs4[] = {"
! sed -ne 's/^<U\(....\)>[[:space:]]*.x\([A-F].\).*/0x\1 0x\2/p' \
"$@" | sort -u | $AWK -f gaptab.awk
echo "};"
--- 1,15 ----
#! /bin/sh
echo "static const uint32_t iso88597_to_ucs4[96] = {"
sed -ne '/^[^[:space:]]*[[:space:]]*.x00/d' \
! -e 's/^<U\(....\)>[[:space:]]*.x\([A-Fa-f].\).*/ [0x\2 - 0xA0] = 0x\1,/p' \
"$@" | sort -u
echo "};"
echo "static const struct gap from_idx[] = {"
! sed -ne 's/^<U\(....\)>[[:space:]]*.x[A-Fa-f]..*/\1/p' \
"$@" | sort -u | $AWK -f gap.awk
echo " { start: 0xffff, end: 0xffff, idx: 0 }"
echo "};"
echo "static const char iso88597_from_ucs4[] = {"
! sed -ne 's/^<U\(....\)>[[:space:]]*.x\([A-Fa-f].\).*/0x\1 0x\2/p' \
"$@" | sort -u | $AWK -f gaptab.awk
echo "};"