This is the mail archive of the cgen@sources.redhat.com mailing list for the CGEN project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]

patch: improved gas error messages


attached is a patch against cgen and opcodes which improves the error
messages gas emits when encountering a syntax error in the operand of
an overloaded instruction. the previous strategy assumed that a parse
error anywhere after the mnemonic represented a failure to match the
"form" of the instruction, so it would move on to the next overloading
and discard the possibly useful operand error message.

for example, suppose I had an instruction with two forms: 

foo bar, (operand)
foo (operand), (operand)

then a "foo bar" instruction with a syntax error in the operand will
cause the "foo bar" to be skipped as a candidate form of the foo insn,
and the second candidate will be tried. if bar is not a valid operand in
the second form, gas emits an error telling you that it cannot recognize
the insn at all, which is false: it has a perfectly good error message
about what was wrong in the first operand, it just dropped it. there
are instruction sets for which this is a problem, and cannot be fixed
by reordering the candidates.

this patch adds a regex_t to each instruction, which is built from
the instruction syntax description, such that any instruction form not
matching the regex is skipped when trying to resolve an overloading.
this has the effect that an operand syntax error will be preserved if
no other overloading candidates match the regex.

additionally, the matching strategy notices when it has at very least
got the mnemonic right. this allows you to differentiate between 
failure-to-match-syntax and unknown-instruction-mnemonic cases.

I've built it on solaris, cygwin and linux here, and a half-dozen target
gas' pass their tests, so it doesn't seem too disruptive.

comments?

-graydon

Index: cgen/opc-itab.scm
===================================================================
RCS file: /cvs/cvsfiles/devo/cgen/opc-itab.scm,v
retrieving revision 1.23
diff -r1.23 opc-itab.scm
636a637,721
> #ifdef CGEN_USE_REGEX
> /*
>   Regex construction routine.
> 
>   This translates an opcode syntax string into a regex string,
>   by replacing any non-character syntax element (such as an
>   opcode) with the pattern '.*'
> 
>   It then compiles the regex and stores it in the opcode, for
>   later use by @arch@_cgen_assemble_insn
> 
>   returns NULL for success, an error message for failure 
> */
> 
> static char * 
> cgen_build_insn_regex (insn)
>      CGEN_INSN *insn;
> {  
>   CGEN_OPCODE *opc = CGEN_INSN_OPCODE (insn);
>   const char *mnem = CGEN_INSN_MNEMONIC (insn);
>   int mnem_len;
>   char rxbuf[CGEN_MAX_RX_ELEMENTS];
>   char *rx = rxbuf;
>   const CGEN_SYNTAX_CHAR_TYPE *syn;
>   int reg_err;
> 
>   syn = CGEN_SYNTAX_STRING (CGEN_OPCODE_SYNTAX (opc));
> 
>   /* Mnemonics come first in the syntax string  */
>   if (! CGEN_SYNTAX_MNEMONIC_P (* syn)) return \"missing mnemonic in syntax string\";
>   ++syn;
> 
>   /* copy the literal mnemonic out of the insn */
>   memset (rx, 0, CGEN_MAX_RX_ELEMENTS);
>   mnem_len = strlen(mnem);
>   memcpy (rx, mnem, mnem_len);
>   rx += mnem_len;
> 
>   /* copy any remaining literals from the syntax string into the rx */
>   for(; * syn != 0 && rx < rxbuf + (CGEN_MAX_RX_ELEMENTS - 4); ++syn, ++rx) 
>     {
>       if (CGEN_SYNTAX_CHAR_P (* syn)) 
> 	{
> 	 char tmp = CGEN_SYNTAX_CHAR (* syn);
> 	 switch (tmp) 
>            {
> 	     /* escape any regex metacharacters in the syntax */
> 	   case '^': case '.': case '[':
> 	   case '$': case '(': case ')':
> 	   case '|': case '*': case '+':
> 	   case '?': case '{': case '\\\\':
> 	     * rx++ = '\\\\';
> 	     break;
> 	   }
> 	 /* insert syntax char into rx */
> 	* rx = tmp;
> 	}
>       else
> 	{
> 	  /* replace non-syntax fields with globs */
> 	  * rx = '.';
> 	  * ++rx = '*';
> 	}
> 	}
>       * rx++ = '$';
>       * rx = '\\0';
> 
>   CGEN_INSN_RX (insn) = xmalloc (sizeof (regex_t));
>   reg_err = regcomp ((regex_t *) CGEN_INSN_RX (insn), rxbuf, REG_NOSUB|REG_ICASE);
> 
>   if (reg_err == 0) 
>     return NULL;
>   else
>     {
>       static char msg[80];
>       regerror (reg_err, (regex_t *) CGEN_INSN_RX (insn), msg, 80);
>       regfree ((regex_t *) CGEN_INSN_RX (insn));
>       free (CGEN_INSN_RX (insn));
>       (CGEN_INSN_RX (insn)) = NULL;
>     return msg;
>     }
> }
> #endif
> 
> 
654a740,742
> #ifdef CGEN_USE_REGEX
>       cgen_build_insn_regex (& insns[i]);
> #endif
663c751,756
<     insns[i].opcode = &oc[i];
---
>     {
>       insns[i].opcode = &oc[i];
> #ifdef CGEN_USE_REGEX	   
>       cgen_build_insn_regex (& insns[i]);
> #endif
>     }
Index: cgen/ChangeLog
===================================================================
RCS file: /cvs/cvsfiles/devo/cgen/ChangeLog,v
retrieving revision 1.701
diff -r1.701 ChangeLog
0a1,5
> 2001-04-24  graydon hoare  <graydon@redhat.com>
> 
> 	* opc-itab.scm: Add cgen_build_insn_regex (insn) and
> 	calls to it to opcode initialization.
> 
Index: opcodes/cgen-asm.in
===================================================================
RCS file: /cvs/cvsfiles/devo/opcodes/cgen-asm.in,v
retrieving revision 1.34
diff -r1.34 cgen-asm.in
213a214
>   int recognized_mnemonic = 0;
228a230
>       recognized_mnemonic = 1;
244a247,253
> #ifdef HAVE_REGEXES
>       /* skip this insn if str doesn't look right lexically */
>       if (CGEN_INSN_RX (insn) != NULL &&
> 	  regexec ((regex_t *) CGEN_INSN_RX (insn), str, 0, NULL, 0) == REG_NOMATCH)
> 	continue;
> #endif
> 
271a281
> 		  recognized_mnemonic ? _("unrecognized form of instruction") :
Index: opcodes/ChangeLog
===================================================================
RCS file: /cvs/cvsfiles/devo/opcodes/ChangeLog,v
retrieving revision 1.1218
diff -r1.1218 ChangeLog
0a1,12
> 2001-04-30  graydon hoare  <graydon@redhat.com>
> 
> 	* cgen-asm.in (_cgen_assemble_insn): made explicit error message for
> 	case of recognized mnemonic but no recognized syntax form
> 
> 2001-04-24  graydon hoare  <graydon@redhat.com>
> 
> 	* cgen-asm.in (@arch@_cgen_assemble_insn): Skip opcode overloading
> 	candidate if syntax regex fails
> 
> 	* configure.in: Add regex-detecting tests.
> 
Index: opcodes/configure.in
===================================================================
RCS file: /cvs/cvsfiles/devo/opcodes/configure.in,v
retrieving revision 1.135
diff -r1.135 configure.in
89a90,95
> AC_CHECK_HEADERS(regex.h)
> AC_CHECK_FUNCS(regcomp)
> AC_CHECK_FUNCS(regexec)
> AC_CHECK_FUNCS(regerror)
> AC_CHECK_FUNCS(regfree)
> 
Index: include/opcode/cgen.h
===================================================================
RCS file: /cvs/cvsfiles/devo/include/opcode/cgen.h,v
retrieving revision 1.54
diff -r1.54 cgen.h
23a24,37
> #ifdef HAVE_REGCOMP
> #ifdef HAVE_REGEXEC
> #ifdef HAVE_REGFREE
> #ifdef HAVE_REGERROR
> #ifdef HAVE_REGEX_H
> #include <regex.h>
> #define CGEN_USE_REGEX
> #endif
> #endif
> #endif
> #endif
> #endif
> 
> 
1004a1019,1024
> 
>   /* Regex to disambiguate overloaded opcodes */
>   void *rx;
> #define CGEN_INSN_RX(insn) ((insn)->rx)
> #define CGEN_MAX_RX_ELEMENTS (CGEN_MAX_SYNTAX_ELEMENTS * 5)
> 
Index: include/opcode/ChangeLog
===================================================================
RCS file: /cvs/cvsfiles/devo/include/opcode/ChangeLog,v
retrieving revision 1.515
diff -r1.515 ChangeLog
0a1,7
> 2001-04-24  graydon hoare  <graydon@redhat.com>
> 
> 	* cgen.h: Add inclusion of regex.h, define of CGEN_USE_REGEX.
> 	(struct cgen_insn): New field void *rx.
> 	(CGEN_INSN_RX): New macro.
> 	(CGEN_MAX_RX_ELEMENTS): New macro.
> 

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]