This is the mail archive of the
binutils@sourceware.org
mailing list for the binutils project.
PATCH: Improve implicit xmm0 handling
- From: "H.J. Lu" <hjl at lucon dot org>
- To: binutils at sources dot redhat dot com
- Date: Fri, 12 Oct 2007 14:38:24 -0700
- Subject: PATCH: Improve implicit xmm0 handling
I am checking in this patch to improve implicit xmm0 handling.
H.J.
---
gas/
2007-10-12 H.J. Lu <hongjiu.lu@intel.com>
* config/tc-i386.c (process_operands): Check the firstxmm0
field in opcode_modifier for instruction with a implicit
xmm0 as the first operand.
opcodes/
2007-10-12 H.J. Lu <hongjiu.lu@intel.com>
* i386-gen.c (opcode_modifiers): Add FirstXmm0.
* i386-opc.h (FirstXmm0): New.
(IsPrefix): Updated.
(i386_opcode_modifier): Add firstxmm0.
* i386-opc.tbl (blendvpd): Replace RegKludge with FirstXmm0.
(blendvps): Likewise.
(pblendvb): Likewise.
* i386-tbl.h: Regenerated.
--- binutils/gas/config/tc-i386.c.xmm0 2007-10-05 11:37:30.000000000 -0700
+++ binutils/gas/config/tc-i386.c 2007-10-12 14:30:55.000000000 -0700
@@ -4230,57 +4230,52 @@ process_operands (void)
|| i.tm.opcode_modifier.drexc)
process_drex ();
- /* The imul $imm, %reg instruction is converted into
- imul $imm, %reg, %reg, and the clr %reg instruction
- is converted into xor %reg, %reg. */
- if (i.tm.opcode_modifier.regkludge)
- {
- if (i.tm.cpu_flags.bitfield.cpusse4_1)
- {
- /* The first operand in instruction blendvpd, blendvps and
- pblendvb in SSE4.1 is implicit and must be xmm0. */
- assert (i.operands == 3
- && i.reg_operands >= 2
- && UINTS_EQUAL (i.types[0], regxmm));
- if (i.op[0].regs->reg_num != 0)
- {
- if (intel_syntax)
- as_bad (_("the last operand of `%s' must be `%sxmm0'"),
- i.tm.name, register_prefix);
- else
- as_bad (_("the first operand of `%s' must be `%sxmm0'"),
- i.tm.name, register_prefix);
- return 0;
- }
- i.op[0] = i.op[1];
- i.op[1] = i.op[2];
- i.types[0] = i.types[1];
- i.types[1] = i.types[2];
- i.operands--;
- i.reg_operands--;
-
- /* We need to adjust fields in i.tm since they are used by
- build_modrm_byte. */
- i.tm.operand_types [0] = i.tm.operand_types [1];
- i.tm.operand_types [1] = i.tm.operand_types [2];
- i.tm.operands--;
- }
- else
- {
- unsigned int first_reg_op;
-
- if (operand_type_check (i.types[0], reg))
- first_reg_op = 0;
- else
- first_reg_op = 1;
- /* Pretend we saw the extra register operand. */
- assert (i.reg_operands == 1
- && i.op[first_reg_op + 1].regs == 0);
- i.op[first_reg_op + 1].regs = i.op[first_reg_op].regs;
- i.types[first_reg_op + 1] = i.types[first_reg_op];
- i.operands++;
- i.reg_operands++;
- }
+ if (i.tm.opcode_modifier.firstxmm0)
+ {
+ /* The first operand is implicit and must be xmm0. */
+ assert (i.reg_operands && UINTS_EQUAL (i.types[0], regxmm));
+ if (i.op[0].regs->reg_num != 0)
+ {
+ if (intel_syntax)
+ as_bad (_("the last operand of `%s' must be `%sxmm0'"),
+ i.tm.name, register_prefix);
+ else
+ as_bad (_("the first operand of `%s' must be `%sxmm0'"),
+ i.tm.name, register_prefix);
+ return 0;
+ }
+ i.op[0] = i.op[1];
+ i.op[1] = i.op[2];
+ i.types[0] = i.types[1];
+ i.types[1] = i.types[2];
+ i.operands--;
+ i.reg_operands--;
+
+ /* We need to adjust fields in i.tm since they are used by
+ build_modrm_byte. */
+ i.tm.operand_types [0] = i.tm.operand_types [1];
+ i.tm.operand_types [1] = i.tm.operand_types [2];
+ i.tm.operands--;
+ }
+ else if (i.tm.opcode_modifier.regkludge)
+ {
+ /* The imul $imm, %reg instruction is converted into
+ imul $imm, %reg, %reg, and the clr %reg instruction
+ is converted into xor %reg, %reg. */
+
+ unsigned int first_reg_op;
+
+ if (operand_type_check (i.types[0], reg))
+ first_reg_op = 0;
+ else
+ first_reg_op = 1;
+ /* Pretend we saw the extra register operand. */
+ assert (i.reg_operands == 1
+ && i.op[first_reg_op + 1].regs == 0);
+ i.op[first_reg_op + 1].regs = i.op[first_reg_op].regs;
+ i.types[first_reg_op + 1] = i.types[first_reg_op];
+ i.operands++;
+ i.reg_operands++;
}
if (i.tm.opcode_modifier.shortform)
--- binutils/opcodes/i386-gen.c.xmm0 2007-10-08 11:12:08.000000000 -0700
+++ binutils/opcodes/i386-gen.c 2007-10-12 14:15:52.000000000 -0700
@@ -276,6 +276,7 @@ static bitfield opcode_modifiers[] =
BITFIELD (FWait),
BITFIELD (IsString),
BITFIELD (RegKludge),
+ BITFIELD (FirstXmm0),
BITFIELD (IsPrefix),
BITFIELD (ImmExt),
BITFIELD (NoRex64),
--- binutils/opcodes/i386-opc.h.xmm0 2007-10-08 11:12:08.000000000 -0700
+++ binutils/opcodes/i386-opc.h 2007-10-12 14:16:17.000000000 -0700
@@ -195,8 +195,10 @@ typedef union i386_cpu_flags
/* fake an extra reg operand for clr, imul and special register
processing for some instructions. */
#define RegKludge (IsString + 1)
+/* The first operand must be xmm0 */
+#define FirstXmm0 (RegKludge + 1)
/* opcode is a prefix */
-#define IsPrefix (RegKludge + 1)
+#define IsPrefix (FirstXmm0 + 1)
/* instruction has extension in 8 bit imm */
#define ImmExt (IsPrefix + 1)
/* instruction don't need Rex64 prefix. */
@@ -240,6 +242,7 @@ typedef struct i386_opcode_modifier
unsigned int fwait:1;
unsigned int isstring:1;
unsigned int regkludge:1;
+ unsigned int firstxmm0:1;
unsigned int isprefix:1;
unsigned int immext:1;
unsigned int norex64:1;
--- binutils/opcodes/i386-opc.tbl.xmm0 2007-10-08 11:12:08.000000000 -0700
+++ binutils/opcodes/i386-opc.tbl 2007-10-12 14:17:40.000000000 -0700
@@ -1365,9 +1365,9 @@ pabsd, 2, 0x660f381e, None, 3, CpuSSSE3,
blendpd, 3, 0x660f3a0d, None, 3, CpuSSE4_1, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_xSuf, { Imm8, BaseIndex|Disp8|Disp16|Disp32|Disp32S|RegXMM, RegXMM }
blendps, 3, 0x660f3a0c, None, 3, CpuSSE4_1, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_xSuf, { Imm8, BaseIndex|Disp8|Disp16|Disp32|Disp32S|RegXMM, RegXMM }
-blendvpd, 3, 0x660f3815, None, 3, CpuSSE4_1, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_xSuf|RegKludge, { RegXMM, BaseIndex|Disp8|Disp16|Disp32|Disp32S|RegXMM, RegXMM }
+blendvpd, 3, 0x660f3815, None, 3, CpuSSE4_1, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_xSuf|FirstXmm0, { RegXMM, BaseIndex|Disp8|Disp16|Disp32|Disp32S|RegXMM, RegXMM }
blendvpd, 2, 0x660f3815, None, 3, CpuSSE4_1, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_xSuf, { BaseIndex|Disp8|Disp16|Disp32|Disp32S|RegXMM, RegXMM }
-blendvps, 3, 0x660f3814, None, 3, CpuSSE4_1, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_xSuf|RegKludge, { RegXMM, BaseIndex|Disp8|Disp16|Disp32|Disp32S|RegXMM, RegXMM }
+blendvps, 3, 0x660f3814, None, 3, CpuSSE4_1, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_xSuf|FirstXmm0, { RegXMM, BaseIndex|Disp8|Disp16|Disp32|Disp32S|RegXMM, RegXMM }
blendvps, 2, 0x660f3814, None, 3, CpuSSE4_1, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_xSuf, { BaseIndex|Disp8|Disp16|Disp32|Disp32S|RegXMM, RegXMM }
dppd, 3, 0x660f3a41, None, 3, CpuSSE4_1, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_xSuf, { Imm8, BaseIndex|Disp8|Disp16|Disp32|Disp32S|RegXMM, RegXMM }
dpps, 3, 0x660f3a40, None, 3, CpuSSE4_1, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_xSuf, { Imm8, BaseIndex|Disp8|Disp16|Disp32|Disp32S|RegXMM, RegXMM }
@@ -1376,7 +1376,7 @@ insertps, 3, 0x660f3a21, None, 3, CpuSSE
movntdqa, 2, 0x660f382a, None, 3, CpuSSE4_1, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_xSuf, { BaseIndex|Disp8|Disp16|Disp32|Disp32S, RegXMM }
mpsadbw, 3, 0x660f3a42, None, 3, CpuSSE4_1, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_xSuf, { Imm8, BaseIndex|Disp8|Disp16|Disp32|Disp32S|RegXMM, RegXMM }
packusdw, 2, 0x660f382b, None, 3, CpuSSE4_1, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_xSuf, { BaseIndex|Disp8|Disp16|Disp32|Disp32S|RegXMM, RegXMM }
-pblendvb, 3, 0x660f3810, None, 3, CpuSSE4_1, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_xSuf|RegKludge, { RegXMM, BaseIndex|Disp8|Disp16|Disp32|Disp32S|RegXMM, RegXMM }
+pblendvb, 3, 0x660f3810, None, 3, CpuSSE4_1, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_xSuf|FirstXmm0, { RegXMM, BaseIndex|Disp8|Disp16|Disp32|Disp32S|RegXMM, RegXMM }
pblendvb, 2, 0x660f3810, None, 3, CpuSSE4_1, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_xSuf, { BaseIndex|Disp8|Disp16|Disp32|Disp32S|RegXMM, RegXMM }
pblendw, 3, 0x660f3a0e, None, 3, CpuSSE4_1, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_xSuf, { Imm8, BaseIndex|Disp8|Disp16|Disp32|Disp32S|RegXMM, RegXMM }
pcmpeqq, 2, 0x660f3829, None, 3, CpuSSE4_1, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_xSuf, { BaseIndex|Disp8|Disp16|Disp32|Disp32S|RegXMM, RegXMM }