This is the mail archive of the newlib@sourceware.org mailing list for the newlib project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] Rename some "CPxxx" to "IBMxxx" (was Re: KOI8 character sets)


On Aug 25 19:39, Andy Koppe wrote:
> 2009/8/25 Jeff Johnston:
> > The fix to sb_charsets.c is correct but the old comment was fine.
> 
> Ah, you're right.
> 
> > Can the CP numbers be used directly? ?You set the charset for KOI8 to
> > one of two CP values. ?Can a user specify those numbers directly as
> > they can for example, CP1258?
> 
> I'd considered that but I think that "CP20866" and "CP21866" should
> not be supported, because "KOI8-R" and "KOI8-U" are the official
> IANA-registered names and the CP names likely wouldn't be recognised
> when connecting to Unix hosts. Also, the CP equivalents of other
> charsets, e.g. "CP936" for "GBK", aren't supported either.

Btw., we should also replace the "CPxxx" specifiers for some of the
charsets with "IBMxxx" as they are known on Linux.  That's the case for
codepages 437, 850, 852, 855, 857, 858, 862, 866, 874.

Patch attached.


Thanks,
Corinna


	* libc/locale/locale.c (loadlocale): Convert some "CPxxx" charset
	names to "IBMxxx" charset names as on Linux.  Fix documentation
	accordingly.


Index: libc/locale/locale.c
===================================================================
RCS file: /cvs/src/src/newlib/libc/locale/locale.c,v
retrieving revision 1.25
diff -u -p -r1.25 locale.c
--- libc/locale/locale.c	25 Aug 2009 18:47:24 -0000	1.25
+++ libc/locale/locale.c	25 Aug 2009 19:41:21 -0000
@@ -57,15 +57,16 @@ country code per ISO 3166.  For <<"chars
 Additionally to the POSIX specifier, seven extensions are supported for
 backward compatibility with older implementations using newlib:
 <<"C-UTF-8">>, <<"C-JIS">>, <<"C-eucJP">>, <<"C-SJIS">>, <<C-KOI8-R>>,
-<<C-KOI8-U>>, <<"C-ISO-8859-x">> with 1 <= x <= 15, or <<"C-CPxxx">> with
-xxx in [437, 720, 737, 775, 850, 852, 855, 857, 858, 862, 866, 874, 1125,
-1250, 1251, 1252, 1253, 1254, 1255, 1256, 1257, 1258].
+<<C-KOI8-U>>, <<"C-ISO-8859-x">> with 1 <= x <= 15, <<"C-CPxxx">> with
+xxx in [720, 737, 775, 1125, 1250, 1251, 1252, 1253, 1254, 1255, 1256,
+1257, 1258], or <<"C-IBMxxx">> with xxx in [437, 850, 852, 855, 857,
+858, 862, 866, 874].
 
 Even when using POSIX locale strings, the only charsets allowed are
 <<"UTF-8">>, <<"JIS">>, <<"EUCJP">>, <<"SJIS">>, <<KOI8-R>>, <<KOI8-U>>,
-<<"ISO-8859-x">> with 1 <= x <= 15, or <<"CPxxx">> with xxx in
-[437, 720, 737, 775, 850, 852, 855, 857, 858, 862, 866, 874, 1125, 1250,
-1251, 1252, 1253, 1254, 1255, 1256, 1257, 1258].
+<<"ISO-8859-x">> with 1 <= x <= 15, <<"CPxxx">> with xxx in [720, 737,
+775, 1125, 1250, 1251, 1252, 1253, 1254, 1255, 1256, 1257, 1258], or
+<<"IBMxxx">> with xxx in [437, 850, 852, 855, 857, 858, 862, 866, 874].
 Charsets are case insensitive.  For instance, <<"EUCJP">> and <<"eucJP">>
 are equivalent.  <<"UTF-8">> can also be written without dash, as in
 <<"UTF8">> or <<"utf8">>.
@@ -552,22 +553,58 @@ loadlocale(struct _reent *p, int categor
     case 'i':
       /* Must be exactly one of ISO-8859-1, [...] ISO-8859-16, except for
          ISO-8859-12. */
-      if (strncasecmp (charset, "ISO-8859-", 9))
-	return NULL;
-      strncpy (charset, "ISO", 3);
-      val = _strtol_r (p, charset + 9, &end, 10);
-      if (val < 1 || val > 16 || val == 12 || *end)
-	return NULL;
-      mbc_max = 1;
+      if (!strncasecmp (charset, "ISO-8859-", 9))
+	{
+	  strncpy (charset, "ISO", 3);
+	  val = _strtol_r (p, charset + 9, &end, 10);
+	  if (val < 1 || val > 16 || val == 12 || *end)
+	    return NULL;
+	  mbc_max = 1;
 #ifdef _MB_CAPABLE
 #ifdef _MB_EXTENDED_CHARSETS_ISO
-      l_wctomb = __iso_wctomb;
-      l_mbtowc = __iso_mbtowc;
+	  l_wctomb = __iso_wctomb;
+	  l_mbtowc = __iso_mbtowc;
 #else /* !_MB_EXTENDED_CHARSETS_ISO */
-      l_wctomb = __ascii_wctomb;
-      l_mbtowc = __ascii_mbtowc;
+	  l_wctomb = __ascii_wctomb;
+	  l_mbtowc = __ascii_mbtowc;
 #endif /* _MB_EXTENDED_CHARSETS_ISO */
 #endif
+	}
+      else if (!strncasecmp (charset, "IBM", 3))
+	{
+	  strncpy (charset, "CP", 2);
+	  memmove (charset + 2, charset + 3, strlen (charset + 3) + 1);
+	  val = _strtol_r (p, charset + 2, &end, 10);
+	  if (*end)
+	    return NULL;
+	  switch (val)
+	    {
+	    case 437:
+	    case 850:
+	    case 852:
+	    case 855:
+	    case 857:
+	    case 858:
+	    case 862:
+	    case 866:
+	    case 874:
+	      mbc_max = 1;
+#ifdef _MB_CAPABLE
+#ifdef _MB_EXTENDED_CHARSETS_WINDOWS
+	      l_wctomb = __cp_wctomb;
+	      l_mbtowc = __cp_mbtowc;
+#else /* !_MB_EXTENDED_CHARSETS_WINDOWS */
+	      l_wctomb = __ascii_wctomb;
+	      l_mbtowc = __ascii_mbtowc;
+#endif /* _MB_EXTENDED_CHARSETS_WINDOWS */
+#endif
+	      break;
+	    default:
+	      return NULL;
+	    }
+	}
+      else
+	return NULL;
     break;
     case 'C':
     case 'c':
@@ -579,18 +616,9 @@ loadlocale(struct _reent *p, int categor
 	return NULL;
       switch (val)
 	{
-	case 437:
 	case 720:
 	case 737:
 	case 775:
-	case 850:
-	case 852:
-	case 855:
-	case 857:
-	case 858:
-	case 862:
-	case 866:
-	case 874:
 	case 1125:
 	case 1250:
 	case 1251:

-- 
Corinna Vinschen
Cygwin Project Co-Leader
Red Hat


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]