This is the mail archive of the binutils@sources.redhat.com mailing list for the binutils project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]

Re: PATCH: binutils + i18n


Here is the promised patch to get rid of the locale dependency of case
conversion in regcomp/regexec calls.

This is three times the same patch, to files marked as "generated by
CGEN". It would be a good idea to apply this patch to CGEN also, but I
don't know where its sources reside.


opcodes/ChangeLog:
2001-09-22  Bruno Haible  <haible@clisp.cons.org>

	* fr30-asm.c (fr30_cgen_build_insn_regex): Generate a case sensitive
	regular expression.
	* openrisc-asm.c (openrisc_cgen_build_insn_regex): Likewise.
	* m32r-asm.c (m32r_cgen_build_insn_regex): Likewise.

*** binutils-010912/opcodes/fr30-asm.c.bak	Sat Sep 15 17:08:26 2001
--- binutils-010912/opcodes/fr30-asm.c	Sat Sep 22 01:25:18 2001
*************** fr30_cgen_build_insn_regex (insn)
*** 346,352 ****
  {  
    CGEN_OPCODE *opc = CGEN_INSN_OPCODE (insn);
    const char *mnem = CGEN_INSN_MNEMONIC (insn);
-   int mnem_len;
    char rxbuf[CGEN_MAX_RX_ELEMENTS];
    char *rx = rxbuf;
    const CGEN_SYNTAX_CHAR_TYPE *syn;
--- 346,351 ----
*************** fr30_cgen_build_insn_regex (insn)
*** 358,398 ****
    if (! CGEN_SYNTAX_MNEMONIC_P (* syn)) return "missing mnemonic in syntax string";
    ++syn;
  
    /* copy the literal mnemonic out of the insn */
!   memset (rx, 0, CGEN_MAX_RX_ELEMENTS);
!   mnem_len = strlen(mnem);
!   memcpy (rx, mnem, mnem_len);
!   rx += mnem_len;
  
    /* copy any remaining literals from the syntax string into the rx */
!   for(; * syn != 0 && rx < rxbuf + (CGEN_MAX_RX_ELEMENTS - 9); ++syn, ++rx) 
      {
        if (CGEN_SYNTAX_CHAR_P (* syn)) 
  	{
! 	 char tmp = CGEN_SYNTAX_CHAR (* syn);
! 	 switch (tmp) 
!            {
! 	     /* escape any regex metacharacters in the syntax */
! 	   case '.': case '[': case '\\': 
! 	   case '*': case '^': case '$': 
  
  #ifdef CGEN_ESCAPE_EXTENDED_REGEX
! 	   case '?': case '{': case '}': 
! 	   case '(': case ')': case '*':
! 	   case '|': case '+': case ']':
  #endif
  
! 	     * rx++ = '\\';
! 	     break;  
! 	   }
! 	 /* insert syntax char into rx */
! 	* rx = tmp;
  	}
        else
  	{
  	  /* replace non-syntax fields with globs */
! 	  * rx = '.';
! 	  * ++rx = '*';
  	}
      }
  
--- 357,424 ----
    if (! CGEN_SYNTAX_MNEMONIC_P (* syn)) return "missing mnemonic in syntax string";
    ++syn;
  
+   /* Generate a case sensitive regular expression that emulates case
+      insensitive matching in the "C" locale.  We cannot generate a case
+      insensitive regular expression because in Turkish locales, 'i' and 'I'
+      are not equal modulo case conversion.  */
+ 
    /* copy the literal mnemonic out of the insn */
!   for (; *mnem; mnem++)
!     {
!       char c = *mnem;
! 
!       if (ISALPHA (c))
! 	{
! 	  *rx++ = '[';
! 	  *rx++ = TOLOWER (c);
! 	  *rx++ = TOUPPER (c);
! 	  *rx++ = ']';
! 	}
!       else
! 	*rx++ = c;
!     }
  
    /* copy any remaining literals from the syntax string into the rx */
!   for(; * syn != 0 && rx <= rxbuf + (CGEN_MAX_RX_ELEMENTS - 7 - 4); ++syn)
      {
        if (CGEN_SYNTAX_CHAR_P (* syn)) 
  	{
! 	  char c = CGEN_SYNTAX_CHAR (* syn);
! 	  switch (c) 
! 	    {
! 	      /* escape any regex metacharacters in the syntax */
! 	    case '.': case '[': case '\\': 
! 	    case '*': case '^': case '$': 
  
  #ifdef CGEN_ESCAPE_EXTENDED_REGEX
! 	    case '?': case '{': case '}': 
! 	    case '(': case ')': case '*':
! 	    case '|': case '+': case ']':
  #endif
  
! 	      *rx++ = '\\';
! 	      *rx++ = c;
! 	      break;
! 	    default:
! 	      if (ISALPHA (c))
! 		{
! 		  *rx++ = '[';
! 		  *rx++ = TOLOWER (c);
! 		  *rx++ = TOUPPER (c);
! 		  *rx++ = ']';
! 		}
! 	      else
! 		*rx++ = c;
! 	      break;
! 	    }
! 	  /* insert syntax char into rx */
! 	  *rx++ = c;
  	}
        else
  	{
  	  /* replace non-syntax fields with globs */
! 	  *rx++ = '.';
! 	  *rx++ = '*';
  	}
      }
  
*************** fr30_cgen_build_insn_regex (insn)
*** 408,414 ****
    * rx = '\0';
  
    CGEN_INSN_RX (insn) = xmalloc (sizeof (regex_t));
!   reg_err = regcomp ((regex_t *) CGEN_INSN_RX (insn), rxbuf, REG_NOSUB|REG_ICASE);
  
    if (reg_err == 0) 
      return NULL;
--- 434,440 ----
    * rx = '\0';
  
    CGEN_INSN_RX (insn) = xmalloc (sizeof (regex_t));
!   reg_err = regcomp ((regex_t *) CGEN_INSN_RX (insn), rxbuf, REG_NOSUB);
  
    if (reg_err == 0) 
      return NULL;
*** binutils-010912/opcodes/openrisc-asm.c.bak	Sat Sep 15 17:12:27 2001
--- binutils-010912/opcodes/openrisc-asm.c	Sat Sep 22 01:25:22 2001
*************** openrisc_cgen_build_insn_regex (insn)
*** 278,284 ****
  {  
    CGEN_OPCODE *opc = CGEN_INSN_OPCODE (insn);
    const char *mnem = CGEN_INSN_MNEMONIC (insn);
-   int mnem_len;
    char rxbuf[CGEN_MAX_RX_ELEMENTS];
    char *rx = rxbuf;
    const CGEN_SYNTAX_CHAR_TYPE *syn;
--- 278,283 ----
*************** openrisc_cgen_build_insn_regex (insn)
*** 290,330 ****
    if (! CGEN_SYNTAX_MNEMONIC_P (* syn)) return "missing mnemonic in syntax string";
    ++syn;
  
    /* copy the literal mnemonic out of the insn */
!   memset (rx, 0, CGEN_MAX_RX_ELEMENTS);
!   mnem_len = strlen(mnem);
!   memcpy (rx, mnem, mnem_len);
!   rx += mnem_len;
  
    /* copy any remaining literals from the syntax string into the rx */
!   for(; * syn != 0 && rx < rxbuf + (CGEN_MAX_RX_ELEMENTS - 9); ++syn, ++rx) 
      {
        if (CGEN_SYNTAX_CHAR_P (* syn)) 
  	{
! 	 char tmp = CGEN_SYNTAX_CHAR (* syn);
! 	 switch (tmp) 
!            {
! 	     /* escape any regex metacharacters in the syntax */
! 	   case '.': case '[': case '\\': 
! 	   case '*': case '^': case '$': 
  
  #ifdef CGEN_ESCAPE_EXTENDED_REGEX
! 	   case '?': case '{': case '}': 
! 	   case '(': case ')': case '*':
! 	   case '|': case '+': case ']':
  #endif
  
! 	     * rx++ = '\\';
! 	     break;  
! 	   }
! 	 /* insert syntax char into rx */
! 	* rx = tmp;
  	}
        else
  	{
  	  /* replace non-syntax fields with globs */
! 	  * rx = '.';
! 	  * ++rx = '*';
  	}
      }
  
--- 289,356 ----
    if (! CGEN_SYNTAX_MNEMONIC_P (* syn)) return "missing mnemonic in syntax string";
    ++syn;
  
+   /* Generate a case sensitive regular expression that emulates case
+      insensitive matching in the "C" locale.  We cannot generate a case
+      insensitive regular expression because in Turkish locales, 'i' and 'I'
+      are not equal modulo case conversion.  */
+ 
    /* copy the literal mnemonic out of the insn */
!   for (; *mnem; mnem++)
!     {
!       char c = *mnem;
! 
!       if (ISALPHA (c))
! 	{
! 	  *rx++ = '[';
! 	  *rx++ = TOLOWER (c);
! 	  *rx++ = TOUPPER (c);
! 	  *rx++ = ']';
! 	}
!       else
! 	*rx++ = c;
!     }
  
    /* copy any remaining literals from the syntax string into the rx */
!   for(; * syn != 0 && rx <= rxbuf + (CGEN_MAX_RX_ELEMENTS - 7 - 4); ++syn)
      {
        if (CGEN_SYNTAX_CHAR_P (* syn)) 
  	{
! 	  char c = CGEN_SYNTAX_CHAR (* syn);
! 	  switch (c) 
! 	    {
! 	      /* escape any regex metacharacters in the syntax */
! 	    case '.': case '[': case '\\': 
! 	    case '*': case '^': case '$': 
  
  #ifdef CGEN_ESCAPE_EXTENDED_REGEX
! 	    case '?': case '{': case '}': 
! 	    case '(': case ')': case '*':
! 	    case '|': case '+': case ']':
  #endif
  
! 	      *rx++ = '\\';
! 	      *rx++ = c;
! 	      break;
! 	    default:
! 	      if (ISALPHA (c))
! 		{
! 		  *rx++ = '[';
! 		  *rx++ = TOLOWER (c);
! 		  *rx++ = TOUPPER (c);
! 		  *rx++ = ']';
! 		}
! 	      else
! 		*rx++ = c;
! 	      break;
! 	    }
! 	  /* insert syntax char into rx */
! 	  *rx++ = c;
  	}
        else
  	{
  	  /* replace non-syntax fields with globs */
! 	  *rx++ = '.';
! 	  *rx++ = '*';
  	}
      }
  
*************** openrisc_cgen_build_insn_regex (insn)
*** 340,346 ****
    * rx = '\0';
  
    CGEN_INSN_RX (insn) = xmalloc (sizeof (regex_t));
!   reg_err = regcomp ((regex_t *) CGEN_INSN_RX (insn), rxbuf, REG_NOSUB|REG_ICASE);
  
    if (reg_err == 0) 
      return NULL;
--- 366,372 ----
    * rx = '\0';
  
    CGEN_INSN_RX (insn) = xmalloc (sizeof (regex_t));
!   reg_err = regcomp ((regex_t *) CGEN_INSN_RX (insn), rxbuf, REG_NOSUB);
  
    if (reg_err == 0) 
      return NULL;
*** binutils-010912/opcodes/m32r-asm.c.bak	Sat Sep 15 17:09:32 2001
--- binutils-010912/opcodes/m32r-asm.c	Sat Sep 22 01:25:26 2001
*************** m32r_cgen_build_insn_regex (insn)
*** 354,360 ****
  {  
    CGEN_OPCODE *opc = CGEN_INSN_OPCODE (insn);
    const char *mnem = CGEN_INSN_MNEMONIC (insn);
-   int mnem_len;
    char rxbuf[CGEN_MAX_RX_ELEMENTS];
    char *rx = rxbuf;
    const CGEN_SYNTAX_CHAR_TYPE *syn;
--- 354,359 ----
*************** m32r_cgen_build_insn_regex (insn)
*** 366,406 ****
    if (! CGEN_SYNTAX_MNEMONIC_P (* syn)) return "missing mnemonic in syntax string";
    ++syn;
  
    /* copy the literal mnemonic out of the insn */
!   memset (rx, 0, CGEN_MAX_RX_ELEMENTS);
!   mnem_len = strlen(mnem);
!   memcpy (rx, mnem, mnem_len);
!   rx += mnem_len;
  
    /* copy any remaining literals from the syntax string into the rx */
!   for(; * syn != 0 && rx < rxbuf + (CGEN_MAX_RX_ELEMENTS - 9); ++syn, ++rx) 
      {
        if (CGEN_SYNTAX_CHAR_P (* syn)) 
  	{
! 	 char tmp = CGEN_SYNTAX_CHAR (* syn);
! 	 switch (tmp) 
!            {
! 	     /* escape any regex metacharacters in the syntax */
! 	   case '.': case '[': case '\\': 
! 	   case '*': case '^': case '$': 
  
  #ifdef CGEN_ESCAPE_EXTENDED_REGEX
! 	   case '?': case '{': case '}': 
! 	   case '(': case ')': case '*':
! 	   case '|': case '+': case ']':
  #endif
  
! 	     * rx++ = '\\';
! 	     break;  
! 	   }
! 	 /* insert syntax char into rx */
! 	* rx = tmp;
  	}
        else
  	{
  	  /* replace non-syntax fields with globs */
! 	  * rx = '.';
! 	  * ++rx = '*';
  	}
      }
  
--- 365,432 ----
    if (! CGEN_SYNTAX_MNEMONIC_P (* syn)) return "missing mnemonic in syntax string";
    ++syn;
  
+   /* Generate a case sensitive regular expression that emulates case
+      insensitive matching in the "C" locale.  We cannot generate a case
+      insensitive regular expression because in Turkish locales, 'i' and 'I'
+      are not equal modulo case conversion.  */
+ 
    /* copy the literal mnemonic out of the insn */
!   for (; *mnem; mnem++)
!     {
!       char c = *mnem;
! 
!       if (ISALPHA (c))
! 	{
! 	  *rx++ = '[';
! 	  *rx++ = TOLOWER (c);
! 	  *rx++ = TOUPPER (c);
! 	  *rx++ = ']';
! 	}
!       else
! 	*rx++ = c;
!     }
  
    /* copy any remaining literals from the syntax string into the rx */
!   for(; * syn != 0 && rx <= rxbuf + (CGEN_MAX_RX_ELEMENTS - 7 - 4); ++syn)
      {
        if (CGEN_SYNTAX_CHAR_P (* syn)) 
  	{
! 	  char c = CGEN_SYNTAX_CHAR (* syn);
! 	  switch (c) 
! 	    {
! 	      /* escape any regex metacharacters in the syntax */
! 	    case '.': case '[': case '\\': 
! 	    case '*': case '^': case '$': 
  
  #ifdef CGEN_ESCAPE_EXTENDED_REGEX
! 	    case '?': case '{': case '}': 
! 	    case '(': case ')': case '*':
! 	    case '|': case '+': case ']':
  #endif
  
! 	      *rx++ = '\\';
! 	      *rx++ = c;
! 	      break;
! 	    default:
! 	      if (ISALPHA (c))
! 		{
! 		  *rx++ = '[';
! 		  *rx++ = TOLOWER (c);
! 		  *rx++ = TOUPPER (c);
! 		  *rx++ = ']';
! 		}
! 	      else
! 		*rx++ = c;
! 	      break;
! 	    }
! 	  /* insert syntax char into rx */
! 	  *rx++ = c;
  	}
        else
  	{
  	  /* replace non-syntax fields with globs */
! 	  *rx++ = '.';
! 	  *rx++ = '*';
  	}
      }
  
*************** m32r_cgen_build_insn_regex (insn)
*** 416,422 ****
    * rx = '\0';
  
    CGEN_INSN_RX (insn) = xmalloc (sizeof (regex_t));
!   reg_err = regcomp ((regex_t *) CGEN_INSN_RX (insn), rxbuf, REG_NOSUB|REG_ICASE);
  
    if (reg_err == 0) 
      return NULL;
--- 442,448 ----
    * rx = '\0';
  
    CGEN_INSN_RX (insn) = xmalloc (sizeof (regex_t));
!   reg_err = regcomp ((regex_t *) CGEN_INSN_RX (insn), rxbuf, REG_NOSUB);
  
    if (reg_err == 0) 
      return NULL;


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]