This is the mail archive of the libc-alpha@sources.redhat.com mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]

bugs in ISO-2022-JP-2 converter



The ISO-2022-JP-2 converter has three bugs:

- An off-by-one invalid array access to iso88597_to_ucs4[].
- U+00A0 can in some cases be converted to ESC N 0xA0, which is not 7-bit.
- The greek alphabet is incorrectly converted, because iso8859-7jp.h is
  incorrectly generated (the sed script looks for uppercase hexadecimal
  digits in the charmap, but they are now lowercase).

This patch fixes them.


2000-09-20  Bruno Haible  <haible@clisp.cons.org>

	* iconvdata/iso-2022-jp.c (BODY for FROM_LOOP): Reject 0x80.
	(BODY for TO_LOOP): Clear bit 7 when outputting ISO-8859-1 upper half
	characters.
	* iconvdata/gen-8bit-gap-1.sh: Recognize lowercase hexadecimal digits
	as equivalent to uppercase hexadecimal digits.

*** glibc-20000914/iconvdata/iso-2022-jp.c.bak	Wed Aug 30 23:43:54 2000
--- glibc-20000914/iconvdata/iso-2022-jp.c	Wed Sep 20 00:39:10 2000
***************
*** 359,365 ****
  	else if (__builtin_expect (set2, ISO88597_set) == ISO88597_set)	      \
  	  {								      \
  	    /* We use the table from the ISO 8859-7 module.  */		      \
! 	    if (inptr[2] < 0x20 || inptr[2] > 0x80)			      \
  	      {								      \
  		if (! ignore_errors_p ())				      \
  		  {							      \
--- 359,365 ----
  	else if (__builtin_expect (set2, ISO88597_set) == ISO88597_set)	      \
  	  {								      \
  	    /* We use the table from the ISO 8859-7 module.  */		      \
! 	    if (inptr[2] < 0x20 || inptr[2] >= 0x80)			      \
  	      {								      \
  		if (! ignore_errors_p ())				      \
  		  {							      \
***************
*** 800,806 ****
  			      }						      \
  			    *outptr++ = ESC;				      \
  			    *outptr++ = 'N';				      \
! 			    *outptr++ = ch;				      \
  			  }						      \
  			else						      \
  			  {						      \
--- 800,806 ----
  			      }						      \
  			    *outptr++ = ESC;				      \
  			    *outptr++ = 'N';				      \
! 			    *outptr++ = ch - 0x80;			      \
  			  }						      \
  			else						      \
  			  {						      \
*** glibc-20000914/iconvdata/gen-8bit-gap-1.sh.bak	Mon Jul  3 16:39:27 2000
--- glibc-20000914/iconvdata/gen-8bit-gap-1.sh	Tue Sep 19 12:53:06 2000
***************
*** 1,15 ****
  #! /bin/sh
  echo "static const uint32_t iso88597_to_ucs4[96] = {"
  sed -ne '/^[^[:space:]]*[[:space:]]*.x00/d' \
!     -e 's/^<U\(....\)>[[:space:]]*.x\([A-F].\).*/  [0x\2 - 0xA0] = 0x\1,/p' \
      "$@" | sort -u
  echo "};"
  echo "static const struct gap from_idx[] = {"
! sed -ne 's/^<U\(....\)>[[:space:]]*.x[A-F]..*/\1/p' \
      "$@" | sort -u | $AWK -f gap.awk
  echo "  { start: 0xffff, end: 0xffff, idx:     0 }"
  echo "};"
  echo "static const char iso88597_from_ucs4[] = {"
! sed -ne 's/^<U\(....\)>[[:space:]]*.x\([A-F].\).*/0x\1 0x\2/p' \
      "$@" | sort -u | $AWK -f gaptab.awk
  echo "};"
--- 1,15 ----
  #! /bin/sh
  echo "static const uint32_t iso88597_to_ucs4[96] = {"
  sed -ne '/^[^[:space:]]*[[:space:]]*.x00/d' \
!     -e 's/^<U\(....\)>[[:space:]]*.x\([A-Fa-f].\).*/  [0x\2 - 0xA0] = 0x\1,/p' \
      "$@" | sort -u
  echo "};"
  echo "static const struct gap from_idx[] = {"
! sed -ne 's/^<U\(....\)>[[:space:]]*.x[A-Fa-f]..*/\1/p' \
      "$@" | sort -u | $AWK -f gap.awk
  echo "  { start: 0xffff, end: 0xffff, idx:     0 }"
  echo "};"
  echo "static const char iso88597_from_ucs4[] = {"
! sed -ne 's/^<U\(....\)>[[:space:]]*.x\([A-Fa-f].\).*/0x\1 0x\2/p' \
      "$@" | sort -u | $AWK -f gaptab.awk
  echo "};"

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]