This is the mail archive of the
binutils@sourceware.org
mailing list for the binutils project.
[PATCH] x86: fix handling of 64-bit operand size VPCMPESTR{I,M}
- From: "Jan Beulich" <JBeulich at suse dot com>
- To: <binutils at sourceware dot org>
- Cc: "H.J. Lu" <hjl dot tools at gmail dot com>
- Date: Tue, 21 Feb 2017 03:40:53 -0700
- Subject: [PATCH] x86: fix handling of 64-bit operand size VPCMPESTR{I,M}
- Authentication-results: sourceware.org; auth=none
Just like REX.W affects operand size of the implicit rAX/rDX inputs to
PCMPESTR{I,M}, VEX.W does for VPCMPESTR{I,M}. For that to work,
build_vex_prefix() needs to not only honor internally specified REX
flags (converting them to their VEX equivalents), but also such
resulting from explicit REX* prefix uses. Also adjust
build_evex_prefix() for consistency.
Similarly the disassembler needs to be adjusted to no longer require
VEX.W to be zero for the instructions to be valid.
Note, however, that this doesn't address the problem of there being no
way to control (at least) {,E}VEX.W for 32- or 16-bit code. Nor does it
address the problem of the many WIG instructions not getting properly
disassembled when VEX.W=1.
gas/
2017-02-21 Jan Beulich <jbeulich@suse.com>
* tc-i386.c (build_vex_prefix): Act on the merged value of
i.rex and i.prefix[REX_PREFIX] instead of just the former.
(build_vex_prefix): Likewise.
* gas/testsuite/gas/i386/rex.s: Add VPCMPESTR{I,M}.
* gas/testsuite/gas/i386/ilp32/rex.d: Adjust accordingly.
* gas/testsuite/gas/i386/rex.d: Likewise.
opcodes/
2017-02-21 Jan Beulich <jbeulich@suse.com>
* i386-dis.c (VEX_W_0F3A60_P_2, VEX_W_0F3A61_P_2): Delete.
(vex_len_table): Make VPCMPESTR{I,M} entries leaf ones.
(vex_w_table): Delete VPCMPESTR{I,M} entries.
* i386-opc.tbl (pcmpestri, pcmpestrm, vpcmpestri, vpcmpestrm):
Drop VexW specification.
--- 2017-02-21/gas/config/tc-i386.c
+++ 2017-02-21/gas/config/tc-i386.c
@@ -3129,6 +3129,7 @@ build_vex_prefix (const insn_template *t
unsigned int register_specifier;
unsigned int implied_prefix;
unsigned int vector_length;
+ rex_byte rex;
/* Check register specifier. */
if (i.vex.register_specifier)
@@ -3146,7 +3147,8 @@ build_vex_prefix (const insn_template *t
&& i.operands == i.reg_operands
&& i.tm.opcode_modifier.vexopcode == VEX0F
&& i.tm.opcode_modifier.s
- && i.rex == REX_B)
+ && i.rex == REX_B
+ && !(i.prefix[REX_PREFIX] & REX_B))
{
unsigned int xchg = i.operands - 1;
union i386_op temp_op;
@@ -3193,10 +3195,12 @@ build_vex_prefix (const insn_template *t
abort ();
}
+ rex = i.rex | i.prefix[REX_PREFIX];
+
/* Use 2-byte VEX prefix if possible. */
if (i.tm.opcode_modifier.vexopcode == VEX0F
&& i.tm.opcode_modifier.vexw != VEXW1
- && (i.rex & (REX_W | REX_X | REX_B)) == 0)
+ && (rex & (REX_W | REX_X | REX_B)) == 0)
{
/* 2-byte VEX prefix. */
unsigned int r;
@@ -3205,7 +3209,7 @@ build_vex_prefix (const insn_template *t
i.vex.bytes[0] = 0xc5;
/* Check the REX.R bit. */
- r = (i.rex & REX_R) ? 0 : 1;
+ r = !(rex & REX_R);
i.vex.bytes[1] = (r << 7
| register_specifier << 3
| vector_length << 2
@@ -3250,10 +3254,10 @@ build_vex_prefix (const insn_template *t
/* The high 3 bits of the second VEX byte are 1's compliment
of RXB bits from REX. */
- i.vex.bytes[1] = (~i.rex & 0x7) << 5 | m;
+ i.vex.bytes[1] = (~rex & 0x7) << 5 | m;
/* Check the REX.W bit. */
- w = (i.rex & REX_W) ? 1 : 0;
+ w = !!(rex & REX_W);
if (i.tm.opcode_modifier.vexw == VEXW1)
w = 1;
@@ -3272,7 +3276,7 @@ build_evex_prefix (void)
unsigned int register_specifier;
unsigned int implied_prefix;
unsigned int m, w;
- rex_byte vrex_used = 0;
+ rex_byte vrex_used = 0, rex = i.rex | i.prefix[REX_PREFIX];
/* Check register specifier. */
if (i.vex.register_specifier)
@@ -3341,7 +3345,7 @@ build_evex_prefix (void)
/* The high 3 bits of the second EVEX byte are 1's compliment of RXB
bits from REX. */
- i.vex.bytes[1] = (~i.rex & 0x7) << 5 | m;
+ i.vex.bytes[1] = (~rex & 0x7) << 5 | m;
/* The fifth bit of the second EVEX byte is 1's compliment of the
REX_R bit in VREX. */
@@ -3368,7 +3372,7 @@ build_evex_prefix (void)
gas_assert (i.vrex == 0);
/* Check the REX.W bit. */
- w = (i.rex & REX_W) ? 1 : 0;
+ w = !!(rex & REX_W);
if (i.tm.opcode_modifier.vexw)
{
if (i.tm.opcode_modifier.vexw == VEXW1)
--- 2017-02-21/gas/testsuite/gas/i386/ilp32/rex.d
+++ 2017-02-21/gas/testsuite/gas/i386/ilp32/rex.d
@@ -20,6 +20,8 @@ Disassembly of section .text:
[ ]*[0-9a-f]+:[ ]+9b dd 30\s+fsave\s+\(%rax\)
[ ]*[0-9a-f]+:[ ]+9b 41 dd 30\s+fsave\s+\(%r8\)
[ ]*[0-9a-f]+:[ ]+40 c5 f9 28 00[ ]+rex vmovapd \(%rax\),%xmm0
+[ ]*[0-9a-f]+:[ ]+c4 e3 f9 61 c0 00[ ]+vpcmpestri \$(0x)?0,%xmm0,%xmm0
+[ ]*[0-9a-f]+:[ ]+c4 e3 f9 60 c0 00[ ]+vpcmpestrm \$(0x)?0,%xmm0,%xmm0
[ ]*[0-9a-f]+:[ ]+40[ ]+rex
[ ]*[0-9a-f]+:[ ]+41[ ]+rex.B
[ ]*[0-9a-f]+:[ ]+42[ ]+rex.X
--- 2017-02-21/gas/testsuite/gas/i386/rex.d
+++ 2017-02-21/gas/testsuite/gas/i386/rex.d
@@ -19,6 +19,8 @@ Disassembly of section .text:
[ ]*[0-9a-f]+:[ ]+9b dd 30\s+fsave\s+\(%rax\)
[ ]*[0-9a-f]+:[ ]+9b 41 dd 30\s+fsave\s+\(%r8\)
[ ]*[0-9a-f]+:[ ]+40 c5 f9 28 00[ ]+rex vmovapd \(%rax\),%xmm0
+[ ]*[0-9a-f]+:[ ]+c4 e3 f9 61 c0 00[ ]+vpcmpestri \$(0x)?0,%xmm0,%xmm0
+[ ]*[0-9a-f]+:[ ]+c4 e3 f9 60 c0 00[ ]+vpcmpestrm \$(0x)?0,%xmm0,%xmm0
[ ]*[0-9a-f]+:[ ]+40[ ]+rex
[ ]*[0-9a-f]+:[ ]+41[ ]+rex.B
[ ]*[0-9a-f]+:[ ]+42[ ]+rex.X
--- 2017-02-21/gas/testsuite/gas/i386/rex.s
+++ 2017-02-21/gas/testsuite/gas/i386/rex.s
@@ -16,6 +16,9 @@ _start:
.byte 0x40
vmovapd (%rax),%xmm0
+ rex64/vpcmpestri $0,%xmm0,%xmm0
+ rex64/vpcmpestrm $0,%xmm0,%xmm0
+
# Test prefixes family.
rex
rex.B
--- 2017-02-21/opcodes/i386-dis.c
+++ 2017-02-21/opcodes/i386-dis.c
@@ -2191,8 +2191,6 @@ enum
VEX_W_0F3A4A_P_2,
VEX_W_0F3A4B_P_2,
VEX_W_0F3A4C_P_2,
- VEX_W_0F3A60_P_2,
- VEX_W_0F3A61_P_2,
VEX_W_0F3A62_P_2,
VEX_W_0F3A63_P_2,
VEX_W_0F3ADF_P_2,
@@ -9998,12 +9996,12 @@ static const struct dis386 vex_len_table
/* VEX_LEN_0F3A60_P_2 */
{
- { VEX_W_TABLE (VEX_W_0F3A60_P_2) },
+ { "vpcmpestrm", { XM, EXx, Ib }, 0 },
},
/* VEX_LEN_0F3A61_P_2 */
{
- { VEX_W_TABLE (VEX_W_0F3A61_P_2) },
+ { "vpcmpestri", { XM, EXx, Ib }, 0 },
},
/* VEX_LEN_0F3A62_P_2 */
@@ -11346,14 +11344,6 @@ static const struct dis386 vex_w_table[]
{ "vpblendvb", { XM, Vex, EXx, XMVexI4 }, 0 },
},
{
- /* VEX_W_0F3A60_P_2 */
- { "vpcmpestrm", { XM, EXx, Ib }, 0 },
- },
- {
- /* VEX_W_0F3A61_P_2 */
- { "vpcmpestri", { XM, EXx, Ib }, 0 },
- },
- {
/* VEX_W_0F3A62_P_2 */
{ "vpcmpistrm", { XM, EXx, Ib }, 0 },
},
--- 2017-02-21/opcodes/i386-opc.tbl
+++ 2017-02-21/opcodes/i386-opc.tbl
@@ -1735,9 +1735,9 @@ roundss, 3, 0x660f3a0a, None, 3, CpuSSE4
pcmpgtq, 2, 0x6637, None, 1, CpuAVX, Modrm|Vex|VexOpcode=1|VexVVVV=1|VexW=1|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|SSE2AVX, { Xmmword|Unspecified|BaseIndex|Disp8|Disp16|Disp32|Disp32S|RegXMM, RegXMM }
pcmpgtq, 2, 0x660f3837, None, 3, CpuSSE4_2, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Xmmword|Unspecified|BaseIndex|Disp8|Disp16|Disp32|Disp32S|RegXMM, RegXMM }
-pcmpestri, 3, 0x6661, None, 1, CpuAVX, Modrm|Vex|VexOpcode=2|VexW=1|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|SSE2AVX, { Imm8, Xmmword|Unspecified|BaseIndex|Disp8|Disp16|Disp32|Disp32S|RegXMM, RegXMM }
+pcmpestri, 3, 0x6661, None, 1, CpuAVX, Modrm|Vex|VexOpcode=2|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|SSE2AVX, { Imm8, Xmmword|Unspecified|BaseIndex|Disp8|Disp16|Disp32|Disp32S|RegXMM, RegXMM }
pcmpestri, 3, 0x660f3a61, None, 3, CpuSSE4_2, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Imm8, Xmmword|Unspecified|BaseIndex|Disp8|Disp16|Disp32|Disp32S|RegXMM, RegXMM }
-pcmpestrm, 3, 0x6660, None, 1, CpuAVX, Modrm|Vex|VexOpcode=2|VexW=1|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|SSE2AVX, { Imm8, Xmmword|Unspecified|BaseIndex|Disp8|Disp16|Disp32|Disp32S|RegXMM, RegXMM }
+pcmpestrm, 3, 0x6660, None, 1, CpuAVX, Modrm|Vex|VexOpcode=2|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|SSE2AVX, { Imm8, Xmmword|Unspecified|BaseIndex|Disp8|Disp16|Disp32|Disp32S|RegXMM, RegXMM }
pcmpestrm, 3, 0x660f3a60, None, 3, CpuSSE4_2, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Imm8, Xmmword|Unspecified|BaseIndex|Disp8|Disp16|Disp32|Disp32S|RegXMM, RegXMM }
pcmpistri, 3, 0x6663, None, 1, CpuAVX, Modrm|Vex|VexOpcode=2|VexW=1|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|SSE2AVX, { Imm8, Xmmword|Unspecified|BaseIndex|Disp8|Disp16|Disp32|Disp32S|RegXMM, RegXMM }
pcmpistri, 3, 0x660f3a63, None, 3, CpuSSE4_2, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Imm8, Xmmword|Unspecified|BaseIndex|Disp8|Disp16|Disp32|Disp32S|RegXMM, RegXMM }
@@ -2215,8 +2215,8 @@ vpcmpeqb, 3, 0x6674, None, 1, CpuAVX, Mo
vpcmpeqd, 3, 0x6676, None, 1, CpuAVX, Modrm|Vex|VexOpcode=0|VexVVVV=1|VexW=1|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Xmmword|Unspecified|BaseIndex|Disp8|Disp16|Disp32|Disp32S|RegXMM, RegXMM, RegXMM }
vpcmpeqq, 3, 0x6629, None, 1, CpuAVX, Modrm|Vex|VexOpcode=1|VexVVVV=1|VexW=1|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Xmmword|Unspecified|BaseIndex|Disp8|Disp16|Disp32|Disp32S|RegXMM, RegXMM, RegXMM }
vpcmpeqw, 3, 0x6675, None, 1, CpuAVX, Modrm|Vex|VexOpcode=0|VexVVVV=1|VexW=1|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Xmmword|Unspecified|BaseIndex|Disp8|Disp16|Disp32|Disp32S|RegXMM, RegXMM, RegXMM }
-vpcmpestri, 3, 0x6661, None, 1, CpuAVX, Modrm|Vex|VexOpcode=2|VexW=1|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Imm8, Xmmword|Unspecified|BaseIndex|Disp8|Disp16|Disp32|Disp32S|RegXMM, RegXMM }
-vpcmpestrm, 3, 0x6660, None, 1, CpuAVX, Modrm|Vex|VexOpcode=2|VexW=1|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Imm8, Xmmword|Unspecified|BaseIndex|Disp8|Disp16|Disp32|Disp32S|RegXMM, RegXMM }
+vpcmpestri, 3, 0x6661, None, 1, CpuAVX, Modrm|Vex|VexOpcode=2|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Imm8, Xmmword|Unspecified|BaseIndex|Disp8|Disp16|Disp32|Disp32S|RegXMM, RegXMM }
+vpcmpestrm, 3, 0x6660, None, 1, CpuAVX, Modrm|Vex|VexOpcode=2|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Imm8, Xmmword|Unspecified|BaseIndex|Disp8|Disp16|Disp32|Disp32S|RegXMM, RegXMM }
vpcmpgtb, 3, 0x6664, None, 1, CpuAVX, Modrm|Vex|VexOpcode=0|VexVVVV=1|VexW=1|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Xmmword|Unspecified|BaseIndex|Disp8|Disp16|Disp32|Disp32S|RegXMM, RegXMM, RegXMM }
vpcmpgtd, 3, 0x6666, None, 1, CpuAVX, Modrm|Vex|VexOpcode=0|VexVVVV=1|VexW=1|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Xmmword|Unspecified|BaseIndex|Disp8|Disp16|Disp32|Disp32S|RegXMM, RegXMM, RegXMM }
vpcmpgtq, 3, 0x6637, None, 1, CpuAVX, Modrm|Vex|VexOpcode=1|VexVVVV=1|VexW=1|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Xmmword|Unspecified|BaseIndex|Disp8|Disp16|Disp32|Disp32S|RegXMM, RegXMM, RegXMM }