This is the mail archive of the cygwin mailing list for the Cygwin project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: Bug in libiconv?


On 1/28/2011 5:12 PM, Bruno Haible wrote:
> Please CC the bug-gnu-libiconv mailing list when discussing possible
> bugs in GNU libiconv.

I hadn't intended on involving bug-gnu-libiconv until we had a working
fix, and a consensus here on @cygwin.  But, in any case, here is the
portion of Corinna's patch dealing with the iconv issues, stripped down
to the minimum necessary to correct the "problem".

As pointed out in the @cygwin thread, there are still some open
questions, which I had hoped to avoid by waiting until cygwin-1.7.8 was
released.

1) On cygwin-1.7.8, __STDC_ISO_10646__ is defined, so this change will
   allow "correct" behavior *if compiled on cygwin-1.7.8*.

-#if __STDC_ISO_10646__ || ((defined _WIN32 || defined __WIN32__) &&
!defined __CYGWIN__)
+#if __STDC_ISO_10646__ || defined _WIN32 || defined __WIN32__

   But cygwin-1.7.8 isn't out yet. But with this change (and the "don't
include windows.h" change) then libiconv will still compile properly on
cygwin-1.5 -- which does not support wide chars, and does NOT define
__STDC_ISO_10646__.  However, it WON'T compile properly on cygwin-1.7.x
up to 1.7.7.

2) From cygwin-1.7.2 to cygwin-1.7.7, the following change could be
   used instead (there's an issue with 1.7.1 which doesn't bear
   exploration here):

-#if __STDC_ISO_10646__ || ((defined _WIN32 || defined __WIN32__) &&
!defined __CYGWIN__)
+#if __STDC_ISO_10646__ || defined _WIN32 || defined __WIN32__ ||
defined __CYGWIN__

   But arguably, then it would break on "old" cygwin like 1.5.  Perhaps
this is ok, since 1.7 has been "out" for over a year, and maybe
bug-gnu-libiconv doesn't care about old,
unsupported-by-the-cygwin-project versions of cygwin.



In any case, the attached patch goes with option 1 above.  It is
completely orthogonal to, and independent of, the other "relocation"
patch, that I posted to the gnulib list.


2010-01-28  Corinna Vinschen  <...>

	Correct wchar handling on cygwin-1.7.x
	* lib/iconv.c (iconv_canonicalize): Allow __STDC_ISO_10646__
	to control, rather than using __CYGWIN__ to veto.
	* lib/iconv_open1.h: Ditto.
	* libcharset/lib/localcharset.c: Don't include windows.h if
	__CYGWIN__.
	(get_charset_aliases): Remove cygwin workaround; rely on generic
	implementation. Be sure to copy result of nl_langinfo into local
	buffer.

--
Chuck
--- libiconv-1.13.1.orig/lib/iconv.c	2009-06-21 13:17:33.000000000 +0200
+++ libiconv-1.13.1/lib/iconv.c	2011-01-27 12:46:21.544296281 +0100
@@ -550,7 +550,7 @@ const char * iconv_canonicalize (const c
     if (ap->encoding_index == ei_local_wchar_t) {
       /* On systems which define __STDC_ISO_10646__, wchar_t is Unicode.
          This is also the case on native Woe32 systems.  */
-#if __STDC_ISO_10646__ || ((defined _WIN32 || defined __WIN32__) && !defined __CYGWIN__)
+#if __STDC_ISO_10646__ || defined _WIN32 || defined __WIN32__
       if (sizeof(wchar_t) == 4) {
         index = ei_ucs4internal;
         break;
--- libiconv-1.13.1.orig/lib/iconv_open1.h	2009-06-21 13:17:33.000000000 +0200
+++ libiconv-1.13.1/lib/iconv_open1.h	2011-01-27 12:47:03.119371056 +0100
@@ -98,7 +98,7 @@
     if (ap->encoding_index == ei_local_wchar_t) {
       /* On systems which define __STDC_ISO_10646__, wchar_t is Unicode.
          This is also the case on native Woe32 systems.  */
-#if __STDC_ISO_10646__ || ((defined _WIN32 || defined __WIN32__) && !defined __CYGWIN__)
+#if __STDC_ISO_10646__ || defined _WIN32 || defined __WIN32__
       if (sizeof(wchar_t) == 4) {
         to_index = ei_ucs4internal;
         break;
@@ -174,7 +174,7 @@
     if (ap->encoding_index == ei_local_wchar_t) {
       /* On systems which define __STDC_ISO_10646__, wchar_t is Unicode.
          This is also the case on native Woe32 systems.  */
-#if __STDC_ISO_10646__ || ((defined _WIN32 || defined __WIN32__) && !defined __CYGWIN__)
+#if __STDC_ISO_10646__ || defined _WIN32 || defined __WIN32__
       if (sizeof(wchar_t) == 4) {
         from_index = ei_ucs4internal;
         break;
--- libiconv-1.13.1.orig/libcharset/lib/localcharset.c	2009-06-21 13:17:33.000000000 +0200
+++ libiconv-1.13.1/libcharset/lib/localcharset.c	2011-01-27 11:53:33.201852883 +0100
@@ -52,10 +52,6 @@
 #   include <locale.h>
 #  endif
 # endif
-# ifdef __CYGWIN__
-#  define WIN32_LEAN_AND_MEAN
-#  include <windows.h>
-# endif
 #elif defined WIN32_NATIVE
 # define WIN32_LEAN_AND_MEAN
 # include <windows.h>
@@ -117,7 +113,7 @@ get_charset_aliases (void)
   cp = charset_aliases;
   if (cp == NULL)
     {
-#if !(defined DARWIN7 || defined VMS || defined WIN32_NATIVE || defined __CYGWIN__)
+#if !(defined DARWIN7 || defined VMS || defined WIN32_NATIVE)
       FILE *fp;
       const char *dir;
       const char *base = "charset.alias";
@@ -276,7 +272,7 @@ get_charset_aliases (void)
 	   "DECKOREAN" "\0" "EUC-KR" "\0";
 # endif
 
-# if defined WIN32_NATIVE || defined __CYGWIN__
+# if defined WIN32_NATIVE
       /* To avoid the troubles of installing a separate file in the same
 	 directory as the DLL and of retrieving the DLL's directory at
 	 runtime, simply inline the aliases here.  */
@@ -332,55 +328,14 @@ locale_charset (void)
 
 # if HAVE_LANGINFO_CODESET
 
-  /* Most systems support nl_langinfo (CODESET) nowadays.  */
-  codeset = nl_langinfo (CODESET);
-
-#  ifdef __CYGWIN__
-  /* Cygwin 2006 does not have locales.  nl_langinfo (CODESET) always
-     returns "US-ASCII".  As long as this is not fixed, return the suffix
-     of the locale name from the environment variables (if present) or
-     the codepage as a number.  */
-  if (codeset != NULL && strcmp (codeset, "US-ASCII") == 0)
-    {
-      const char *locale;
-      static char buf[2 + 10 + 1];
+  /* Most systems support nl_langinfo (CODESET) nowadays.
+  
+     POSIX allows that the returned pointer may point to a static area that
+     may be overwritten by subsequent calls to setlocale or nl_langinfo. */
+  static char codeset_buf[64];
 
-      locale = getenv ("LC_ALL");
-      if (locale == NULL || locale[0] == '\0')
-	{
-	  locale = getenv ("LC_CTYPE");
-	  if (locale == NULL || locale[0] == '\0')
-	    locale = getenv ("LANG");
-	}
-      if (locale != NULL && locale[0] != '\0')
-	{
-	  /* If the locale name contains an encoding after the dot, return
-	     it.  */
-	  const char *dot = strchr (locale, '.');
-
-	  if (dot != NULL)
-	    {
-	      const char *modifier;
-
-	      dot++;
-	      /* Look for the possible @... trailer and remove it, if any.  */
-	      modifier = strchr (dot, '@');
-	      if (modifier == NULL)
-		return dot;
-	      if (modifier - dot < sizeof (buf))
-		{
-		  memcpy (buf, dot, modifier - dot);
-		  buf [modifier - dot] = '\0';
-		  return buf;
-		}
-	    }
-	}
-
-      /* Woe32 has a function returning the locale's codepage as a number.  */
-      sprintf (buf, "CP%u", GetACP ());
-      codeset = buf;
-    }
-#  endif
+  codeset_buf[0] = '\0';
+  codeset = strncat (codeset_buf, nl_langinfo (CODESET), sizeof (codeset_buf));
 
 # else
 
--
Problem reports:       http://cygwin.com/problems.html
FAQ:                   http://cygwin.com/faq/
Documentation:         http://cygwin.com/docs.html
Unsubscribe info:      http://cygwin.com/ml/#unsubscribe-simple

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]