This is the mail archive of the
binutils@sources.redhat.com
mailing list for the binutils project.
Fix some SSE2/x86_64 instruction patterns
- From: Jan Hubicka <jh at suse dot cz>
- To: binutils at sources dot redhat dot com, patches at x86-64 dot org
- Date: Fri, 25 Jan 2002 20:59:07 +0100
- Subject: Fix some SSE2/x86_64 instruction patterns
Hi,
this patch fixes number of mistakes in i386.h spotted by Douglas Paulson.
Basically the x86_64 allow 16bit pushes, but does not allow 32bit pushes,
but gas refused the 16bit as well.
The in/out instructions are still only 32bit and there were some other
assorted bugs.
* sse2.s (cvtps2dq, movdq2q, movq2dq): Correct operand types.
* i386.h: Fix bugs spotted by Douglas Paulson.
(wq_Suf): New.
(push, pop patterns): Allow 'w' suffix on 64bit.
(xchq): Enable them on 64bit.
(in, out patterns): Prohibit 'q' suffix on 64bit.
(call, jmp): They do have implicit REX prefix.
(loop patterns): Allow 32bit counter in 64bit mode.
(movq): Add missing patterns.
(movmskps, pextrw, pinsrw, pmovmskb): Allow 64bit operand size.
(cvtps2dq, movdq2q, movq2dq): Correct operand ordering.
Index: gas/testsuite/gas/i386/sse2.s
===================================================================
RCS file: /cvs/src/src/gas/testsuite/gas/i386/sse2.s,v
retrieving revision 1.3
diff -c -3 -p -r1.3 sse2.s
*** sse2.s 2001/05/12 09:52:40 1.3
--- sse2.s 2002/01/24 17:53:01
*************** foo:
*** 125,131 ****
cvtdq2ps %xmm0, %xmm1
cvtpd2ps %xmm0, %xmm1
cvtps2pd %xmm0, %xmm1
! cvtps2dq %xmm0, %xmm1
cvtsd2ss %xmm0, %xmm1
cvtss2sd %xmm0, %xmm1
cvttpd2dq %xmm0, %xmm1
--- 125,131 ----
cvtdq2ps %xmm0, %xmm1
cvtpd2ps %xmm0, %xmm1
cvtps2pd %xmm0, %xmm1
! cvtps2dq %xmm0, %mm1
cvtsd2ss %xmm0, %xmm1
cvtss2sd %xmm0, %xmm1
cvttpd2dq %xmm0, %xmm1
*************** foo:
*** 135,142 ****
movdqa %xmm0, %xmm1
movdqu %xmm0, %xmm1
movdqu %xmm0, %xmm1
! movdq2q %xmm0, %mm1
! movq2dq %mm0, %xmm1
pmuludq %xmm0, %xmm1
pmuludq %xmm0, %xmm1
pshufd $1, %xmm0, %xmm1
--- 135,142 ----
movdqa %xmm0, %xmm1
movdqu %xmm0, %xmm1
movdqu %xmm0, %xmm1
! movdq2q %mm0, %xmm1
! movq2dq %xmm0, %mm1
pmuludq %xmm0, %xmm1
pmuludq %xmm0, %xmm1
pshufd $1, %xmm0, %xmm1
Index: include/opcode/i386.h
===================================================================
RCS file: /cvs/src/src/include/opcode/i386.h,v
retrieving revision 1.34
diff -c -3 -p -r1.34 i386.h
*** i386.h 2001/11/13 01:03:55 1.34
--- i386.h 2002/01/24 17:53:11
*************** static const template i386_optab[] = {
*** 62,67 ****
--- 62,68 ----
#define wl_Suf (No_bSuf|No_sSuf|No_xSuf|No_qSuf)
#define wlq_Suf (No_bSuf|No_sSuf|No_xSuf)
#define lq_Suf (No_bSuf|No_wSuf|No_sSuf|No_xSuf)
+ #define wq_Suf (No_bSuf|No_lSuf|No_sSuf|No_xSuf)
#define sl_Suf (No_bSuf|No_wSuf|No_xSuf|No_qSuf)
#define sldx_Suf (No_bSuf|No_wSuf|No_qSuf)
#define bwl_Suf (No_sSuf|No_xSuf|No_qSuf)
*************** static const template i386_optab[] = {
*** 148,159 ****
{"push", 1, 0x06, X, 0|CpuNo64, wl_Suf|Seg2ShortForm|DefaultSize, { SReg2, 0, 0 } },
{"push", 1, 0x0fa0, X, Cpu386|CpuNo64, wl_Suf|Seg3ShortForm|DefaultSize, { SReg3, 0, 0 } },
/* In 64bit mode, the operand size is implicitly 64bit. */
! {"push", 1, 0x50, X, Cpu64, q_Suf|ShortForm|DefaultSize|NoRex64, { Reg64, 0, 0 } },
! {"push", 1, 0xff, 6, Cpu64, q_Suf|Modrm|DefaultSize|NoRex64, { Reg64|WordMem, 0, 0 } },
! {"push", 1, 0x6a, X, Cpu186|Cpu64, q_Suf|DefaultSize|NoRex64, { Imm8S, 0, 0} },
! {"push", 1, 0x68, X, Cpu186|Cpu64, q_Suf|DefaultSize|NoRex64, { Imm32S, 0, 0} },
! {"push", 1, 0x06, X, Cpu64, q_Suf|Seg2ShortForm|DefaultSize|NoRex64, { SReg2, 0, 0 } },
! {"push", 1, 0x0fa0, X, Cpu386|Cpu64, q_Suf|Seg3ShortForm|DefaultSize|NoRex64, { SReg3, 0, 0 } },
{"pusha", 0, 0x60, X, Cpu186|CpuNo64, wl_Suf|DefaultSize, { 0, 0, 0 } },
--- 149,160 ----
{"push", 1, 0x06, X, 0|CpuNo64, wl_Suf|Seg2ShortForm|DefaultSize, { SReg2, 0, 0 } },
{"push", 1, 0x0fa0, X, Cpu386|CpuNo64, wl_Suf|Seg3ShortForm|DefaultSize, { SReg3, 0, 0 } },
/* In 64bit mode, the operand size is implicitly 64bit. */
! {"push", 1, 0x50, X, Cpu64, wq_Suf|ShortForm|DefaultSize|NoRex64, { Reg64, 0, 0 } },
! {"push", 1, 0xff, 6, Cpu64, wq_Suf|Modrm|DefaultSize|NoRex64, { Reg64|WordMem, 0, 0 } },
! {"push", 1, 0x6a, X, Cpu186|Cpu64, wq_Suf|DefaultSize|NoRex64, { Imm8S, 0, 0} },
! {"push", 1, 0x68, X, Cpu186|Cpu64, wq_Suf|DefaultSize|NoRex64, { Imm32S, 0, 0} },
! {"push", 1, 0x06, X, Cpu64, wq_Suf|Seg2ShortForm|DefaultSize|NoRex64, { SReg2, 0, 0 } },
! {"push", 1, 0x0fa0, X, Cpu386|Cpu64, wq_Suf|Seg3ShortForm|DefaultSize|NoRex64, { SReg3, 0, 0 } },
{"pusha", 0, 0x60, X, Cpu186|CpuNo64, wl_Suf|DefaultSize, { 0, 0, 0 } },
*************** static const template i386_optab[] = {
*** 164,173 ****
{"pop", 1, 0x07, X, CpuNo64, wl_Suf|Seg2ShortForm|DefaultSize, { SReg2, 0, 0 } },
{"pop", 1, 0x0fa1, X, Cpu386|CpuNo64, wl_Suf|Seg3ShortForm|DefaultSize, { SReg3, 0, 0 } },
/* In 64bit mode, the operand size is implicitly 64bit. */
! {"pop", 1, 0x58, X, Cpu64, q_Suf|ShortForm|DefaultSize|NoRex64, { Reg64, 0, 0 } },
! {"pop", 1, 0x8f, 0, Cpu64, q_Suf|Modrm|DefaultSize|NoRex64, { Reg64|WordMem, 0, 0 } },
! {"pop", 1, 0x07, X, Cpu64, q_Suf|Seg2ShortForm|DefaultSize|NoRex64, { SReg2, 0, 0 } },
! {"pop", 1, 0x0fa1, X, Cpu64, q_Suf|Seg3ShortForm|DefaultSize|NoRex64, { SReg3, 0, 0 } },
{"popa", 0, 0x61, X, Cpu186|CpuNo64, wl_Suf|DefaultSize, { 0, 0, 0 } },
--- 165,174 ----
{"pop", 1, 0x07, X, CpuNo64, wl_Suf|Seg2ShortForm|DefaultSize, { SReg2, 0, 0 } },
{"pop", 1, 0x0fa1, X, Cpu386|CpuNo64, wl_Suf|Seg3ShortForm|DefaultSize, { SReg3, 0, 0 } },
/* In 64bit mode, the operand size is implicitly 64bit. */
! {"pop", 1, 0x58, X, Cpu64, wq_Suf|ShortForm|DefaultSize|NoRex64, { Reg64, 0, 0 } },
! {"pop", 1, 0x8f, 0, Cpu64, wq_Suf|Modrm|DefaultSize|NoRex64, { Reg64|WordMem, 0, 0 } },
! {"pop", 1, 0x07, X, Cpu64, wq_Suf|Seg2ShortForm|DefaultSize|NoRex64, { SReg2, 0, 0 } },
! {"pop", 1, 0x0fa1, X, Cpu64, wq_Suf|Seg3ShortForm|DefaultSize|NoRex64, { SReg3, 0, 0 } },
{"popa", 0, 0x61, X, Cpu186|CpuNo64, wl_Suf|DefaultSize, { 0, 0, 0 } },
*************** static const template i386_optab[] = {
*** 176,195 ****
In the 64bit code, xchg eax, eax is reused for new nop instruction.
*/
! {"xchg", 2, 0x90, X, CpuNo64, wl_Suf|ShortForm, { WordReg, Acc, 0 } },
! {"xchg", 2, 0x90, X, CpuNo64, wl_Suf|ShortForm, { Acc, WordReg, 0 } },
{"xchg", 2, 0x86, X, 0, bwlq_Suf|W|Modrm, { Reg, Reg|AnyMem, 0 } },
{"xchg", 2, 0x86, X, 0, bwlq_Suf|W|Modrm, { Reg|AnyMem, Reg, 0 } },
/* In/out from ports. */
! {"in", 2, 0xe4, X, 0, bwlq_Suf|W, { Imm8, Acc, 0 } },
! {"in", 2, 0xec, X, 0, bwlq_Suf|W, { InOutPortReg, Acc, 0 } },
! {"in", 1, 0xe4, X, 0, bwlq_Suf|W, { Imm8, 0, 0 } },
! {"in", 1, 0xec, X, 0, bwlq_Suf|W, { InOutPortReg, 0, 0 } },
! {"out", 2, 0xe6, X, 0, bwlq_Suf|W, { Acc, Imm8, 0 } },
! {"out", 2, 0xee, X, 0, bwlq_Suf|W, { Acc, InOutPortReg, 0 } },
! {"out", 1, 0xe6, X, 0, bwlq_Suf|W, { Imm8, 0, 0 } },
! {"out", 1, 0xee, X, 0, bwlq_Suf|W, { InOutPortReg, 0, 0 } },
/* Load effective address. */
{"lea", 2, 0x8d, X, 0, wlq_Suf|Modrm, { WordMem, WordReg, 0 } },
--- 177,196 ----
In the 64bit code, xchg eax, eax is reused for new nop instruction.
*/
! {"xchg", 2, 0x90, X, 0, wlq_Suf|ShortForm, { WordReg, Acc, 0 } },
! {"xchg", 2, 0x90, X, 0, wlq_Suf|ShortForm, { Acc, WordReg, 0 } },
{"xchg", 2, 0x86, X, 0, bwlq_Suf|W|Modrm, { Reg, Reg|AnyMem, 0 } },
{"xchg", 2, 0x86, X, 0, bwlq_Suf|W|Modrm, { Reg|AnyMem, Reg, 0 } },
/* In/out from ports. */
! {"in", 2, 0xe4, X, 0, bwl_Suf|W, { Imm8, Acc, 0 } },
! {"in", 2, 0xec, X, 0, bwl_Suf|W, { InOutPortReg, Acc, 0 } },
! {"in", 1, 0xe4, X, 0, bwl_Suf|W, { Imm8, 0, 0 } },
! {"in", 1, 0xec, X, 0, bwl_Suf|W, { InOutPortReg, 0, 0 } },
! {"out", 2, 0xe6, X, 0, bwl_Suf|W, { Acc, Imm8, 0 } },
! {"out", 2, 0xee, X, 0, bwl_Suf|W, { Acc, InOutPortReg, 0 } },
! {"out", 1, 0xe6, X, 0, bwl_Suf|W, { Imm8, 0, 0 } },
! {"out", 1, 0xee, X, 0, bwl_Suf|W, { InOutPortReg, 0, 0 } },
/* Load effective address. */
{"lea", 2, 0x8d, X, 0, wlq_Suf|Modrm, { WordMem, WordReg, 0 } },
*************** static const template i386_optab[] = {
*** 210,218 ****
{"lahf", 0, 0x9f, X, CpuNo64,NoSuf, { 0, 0, 0} },
{"sahf", 0, 0x9e, X, CpuNo64,NoSuf, { 0, 0, 0} },
{"pushf", 0, 0x9c, X, CpuNo64,wlq_Suf|DefaultSize, { 0, 0, 0} },
! {"pushf", 0, 0x9c, X, Cpu64, q_Suf|DefaultSize|NoRex64,{ 0, 0, 0} },
{"popf", 0, 0x9d, X, CpuNo64,wlq_Suf|DefaultSize, { 0, 0, 0} },
! {"popf", 0, 0x9d, X, Cpu64, q_Suf|DefaultSize|NoRex64,{ 0, 0, 0} },
{"stc", 0, 0xf9, X, 0, NoSuf, { 0, 0, 0} },
{"std", 0, 0xfd, X, 0, NoSuf, { 0, 0, 0} },
{"sti", 0, 0xfb, X, 0, NoSuf, { 0, 0, 0} },
--- 211,219 ----
{"lahf", 0, 0x9f, X, CpuNo64,NoSuf, { 0, 0, 0} },
{"sahf", 0, 0x9e, X, CpuNo64,NoSuf, { 0, 0, 0} },
{"pushf", 0, 0x9c, X, CpuNo64,wlq_Suf|DefaultSize, { 0, 0, 0} },
! {"pushf", 0, 0x9c, X, Cpu64, wq_Suf|DefaultSize|NoRex64,{ 0, 0, 0} },
{"popf", 0, 0x9d, X, CpuNo64,wlq_Suf|DefaultSize, { 0, 0, 0} },
! {"popf", 0, 0x9d, X, Cpu64, wq_Suf|DefaultSize|NoRex64,{ 0, 0, 0} },
{"stc", 0, 0xf9, X, 0, NoSuf, { 0, 0, 0} },
{"std", 0, 0xfd, X, 0, NoSuf, { 0, 0, 0} },
{"sti", 0, 0xfb, X, 0, NoSuf, { 0, 0, 0} },
*************** static const template i386_optab[] = {
*** 370,376 ****
/* Control transfer instructions. */
{"call", 1, 0xe8, X, 0, wlq_Suf|JumpDword|DefaultSize, { Disp16|Disp32, 0, 0} },
! {"call", 1, 0xff, 2, 0, wlq_Suf|Modrm|DefaultSize, { WordReg|WordMem|JumpAbsolute, 0, 0} },
/* Intel Syntax */
{"call", 2, 0x9a, X, CpuNo64,wlq_Suf|JumpInterSegment|DefaultSize, { Imm16, Imm16|Imm32, 0} },
/* Intel Syntax */
--- 371,378 ----
/* Control transfer instructions. */
{"call", 1, 0xe8, X, 0, wlq_Suf|JumpDword|DefaultSize, { Disp16|Disp32, 0, 0} },
! {"call", 1, 0xff, 2, CpuNo64, wl_Suf|Modrm|DefaultSize, { WordReg|WordMem|JumpAbsolute, 0, 0} },
! {"call", 1, 0xff, 2, Cpu64, wq_Suf|Modrm|DefaultSize|NoRex64,{ WordReg|WordMem|JumpAbsolute, 0, 0} },
/* Intel Syntax */
{"call", 2, 0x9a, X, CpuNo64,wlq_Suf|JumpInterSegment|DefaultSize, { Imm16, Imm16|Imm32, 0} },
/* Intel Syntax */
*************** static const template i386_optab[] = {
*** 381,387 ****
#define JUMP_PC_RELATIVE 0xeb
{"jmp", 1, 0xeb, X, 0, NoSuf|Jump, { Disp, 0, 0} },
! {"jmp", 1, 0xff, 4, 0, wlq_Suf|Modrm, { WordReg|WordMem|JumpAbsolute, 0, 0} },
/* Intel Syntax */
{"jmp", 2, 0xea, X, CpuNo64,wl_Suf|JumpInterSegment, { Imm16, Imm16|Imm32, 0} },
/* Intel Syntax */
--- 383,390 ----
#define JUMP_PC_RELATIVE 0xeb
{"jmp", 1, 0xeb, X, 0, NoSuf|Jump, { Disp, 0, 0} },
! {"jmp", 1, 0xff, 4, CpuNo64, wl_Suf|Modrm, { WordReg|WordMem|JumpAbsolute, 0, 0} },
! {"jmp", 1, 0xff, 4, Cpu64, wq_Suf|Modrm|NoRex64, { WordReg|WordMem|JumpAbsolute, 0, 0} },
/* Intel Syntax */
{"jmp", 2, 0xea, X, CpuNo64,wl_Suf|JumpInterSegment, { Imm16, Imm16|Imm32, 0} },
/* Intel Syntax */
*************** static const template i386_optab[] = {
*** 432,449 ****
{"jg", 1, 0x7f, X, 0, NoSuf|Jump, { Disp, 0, 0} },
/* jcxz vs. jecxz is chosen on the basis of the address size prefix. */
! {"jcxz", 1, 0xe3, X, 0, NoSuf|JumpByte|Size16, { Disp, 0, 0} },
! {"jecxz", 1, 0xe3, X, 0, NoSuf|JumpByte|Size32, { Disp, 0, 0} },
/* The loop instructions also use the address size prefix to select
%cx rather than %ecx for the loop count, so the `w' form of these
instructions emit an address size prefix rather than a data size
prefix. */
! {"loop", 1, 0xe2, X, 0, wlq_Suf|JumpByte, { Disp, 0, 0} },
! {"loopz", 1, 0xe1, X, 0, wlq_Suf|JumpByte, { Disp, 0, 0} },
! {"loope", 1, 0xe1, X, 0, wlq_Suf|JumpByte, { Disp, 0, 0} },
! {"loopnz", 1, 0xe0, X, 0, wlq_Suf|JumpByte, { Disp, 0, 0} },
! {"loopne", 1, 0xe0, X, 0, wlq_Suf|JumpByte, { Disp, 0, 0} },
/* Set byte on flag instructions. */
{"seto", 1, 0x0f90, 0, Cpu386, b_Suf|Modrm, { Reg8|ByteMem, 0, 0} },
--- 435,459 ----
{"jg", 1, 0x7f, X, 0, NoSuf|Jump, { Disp, 0, 0} },
/* jcxz vs. jecxz is chosen on the basis of the address size prefix. */
! {"jcxz", 1, 0xe3, X, CpuNo64,NoSuf|JumpByte|Size16, { Disp, 0, 0} },
! {"jecxz", 1, 0xe3, X, CpuNo64,NoSuf|JumpByte|Size32, { Disp, 0, 0} },
! {"jecxz", 1, 0x67e3, X, Cpu64,NoSuf|JumpByte|Size32, { Disp, 0, 0} },
! {"jrcxz", 1, 0xe3, X, Cpu64, NoSuf|JumpByte|Size64|NoRex64, { Disp, 0, 0} },
/* The loop instructions also use the address size prefix to select
%cx rather than %ecx for the loop count, so the `w' form of these
instructions emit an address size prefix rather than a data size
prefix. */
! {"loop", 1, 0xe2, X, CpuNo64,wl_Suf|JumpByte,{ Disp, 0, 0} },
! {"loop", 1, 0xe2, X, Cpu64, lq_Suf|JumpByte|NoRex64,{ Disp, 0, 0} },
! {"loopz", 1, 0xe1, X, CpuNo64,wl_Suf|JumpByte,{ Disp, 0, 0} },
! {"loopz", 1, 0xe1, X, Cpu64, lq_Suf|JumpByte|NoRex64,{ Disp, 0, 0} },
! {"loope", 1, 0xe1, X, CpuNo64,wl_Suf|JumpByte,{ Disp, 0, 0} },
! {"loope", 1, 0xe1, X, Cpu64, lq_Suf|JumpByte|NoRex64,{ Disp, 0, 0} },
! {"loopnz", 1, 0xe0, X, CpuNo64,wl_Suf|JumpByte,{ Disp, 0, 0} },
! {"loopnz", 1, 0xe0, X, Cpu64, lq_Suf|JumpByte|NoRex64,{ Disp, 0, 0} },
! {"loopne", 1, 0xe0, X, CpuNo64,wl_Suf|JumpByte,{ Disp, 0, 0} },
! {"loopne", 1, 0xe0, X, Cpu64, lq_Suf|JumpByte|NoRex64,{ Disp, 0, 0} },
/* Set byte on flag instructions. */
{"seto", 1, 0x0f90, 0, Cpu386, b_Suf|Modrm, { Reg8|ByteMem, 0, 0} },
*************** static const template i386_optab[] = {
*** 482,491 ****
{"cmps", 2, 0xa6, X, 0, bwlq_Suf|W|IsString, { AnyMem|EsSeg, AnyMem, 0} },
{"scmp", 0, 0xa6, X, 0, bwlq_Suf|W|IsString, { 0, 0, 0} },
{"scmp", 2, 0xa6, X, 0, bwlq_Suf|W|IsString, { AnyMem|EsSeg, AnyMem, 0} },
! {"ins", 0, 0x6c, X, Cpu186, bwlq_Suf|W|IsString, { 0, 0, 0} },
! {"ins", 2, 0x6c, X, Cpu186, bwlq_Suf|W|IsString, { InOutPortReg, AnyMem|EsSeg, 0} },
! {"outs", 0, 0x6e, X, Cpu186, bwlq_Suf|W|IsString, { 0, 0, 0} },
! {"outs", 2, 0x6e, X, Cpu186, bwlq_Suf|W|IsString, { AnyMem, InOutPortReg, 0} },
{"lods", 0, 0xac, X, 0, bwlq_Suf|W|IsString, { 0, 0, 0} },
{"lods", 1, 0xac, X, 0, bwlq_Suf|W|IsString, { AnyMem, 0, 0} },
{"lods", 2, 0xac, X, 0, bwlq_Suf|W|IsString, { AnyMem, Acc, 0} },
--- 492,501 ----
{"cmps", 2, 0xa6, X, 0, bwlq_Suf|W|IsString, { AnyMem|EsSeg, AnyMem, 0} },
{"scmp", 0, 0xa6, X, 0, bwlq_Suf|W|IsString, { 0, 0, 0} },
{"scmp", 2, 0xa6, X, 0, bwlq_Suf|W|IsString, { AnyMem|EsSeg, AnyMem, 0} },
! {"ins", 0, 0x6c, X, Cpu186, bwl_Suf|W|IsString, { 0, 0, 0} },
! {"ins", 2, 0x6c, X, Cpu186, bwl_Suf|W|IsString, { InOutPortReg, AnyMem|EsSeg, 0} },
! {"outs", 0, 0x6e, X, Cpu186, bwl_Suf|W|IsString, { 0, 0, 0} },
! {"outs", 2, 0x6e, X, Cpu186, bwl_Suf|W|IsString, { AnyMem, InOutPortReg, 0} },
{"lods", 0, 0xac, X, 0, bwlq_Suf|W|IsString, { 0, 0, 0} },
{"lods", 1, 0xac, X, 0, bwlq_Suf|W|IsString, { AnyMem, 0, 0} },
{"lods", 2, 0xac, X, 0, bwlq_Suf|W|IsString, { AnyMem, Acc, 0} },
*************** static const template i386_optab[] = {
*** 971,982 ****
{"movd", 2, 0x660f6e,X,CpuSSE2,FP|Modrm, { Reg32|LLongMem, RegXMM, 0 } },
{"movd", 2, 0x660f7e,X,CpuSSE2,FP|Modrm, { RegXMM, Reg32|LLongMem, 0 } },
/* Real MMX instructions. */
{"movq", 2, 0x0f6f, X, CpuMMX, FP|Modrm, { RegMMX|LongMem, RegMMX, 0 } },
{"movq", 2, 0x0f7f, X, CpuMMX, FP|Modrm, { RegMMX, RegMMX|LongMem, 0 } },
{"movq", 2, 0xf30f7e,X,CpuSSE2,FP|Modrm, { RegXMM|LLongMem, RegXMM, 0 } },
{"movq", 2, 0x660fd6,X,CpuSSE2,FP|Modrm, { RegXMM, RegXMM|LLongMem, 0 } },
- /* In the 64bit mode the short form mov immediate is redefined to have
- 64bit displacement value. */
{"movq", 2, 0x88, X, Cpu64, NoSuf|D|W|Modrm|Size64,{ Reg64, Reg64|AnyMem, 0 } },
{"movq", 2, 0xc6, 0, Cpu64, NoSuf|W|Modrm|Size64, { Imm32S, Reg64|WordMem, 0 } },
{"movq", 2, 0xb0, X, Cpu64, NoSuf|W|ShortForm|Size64,{ Imm64, Reg64, 0 } },
--- 981,996 ----
{"movd", 2, 0x660f6e,X,CpuSSE2,FP|Modrm, { Reg32|LLongMem, RegXMM, 0 } },
{"movd", 2, 0x660f7e,X,CpuSSE2,FP|Modrm, { RegXMM, Reg32|LLongMem, 0 } },
/* Real MMX instructions. */
+ {"movd", 2, 0x0f6e, X, CpuMMX, FP|Modrm, { Reg64|LLongMem, RegMMX, 0 } },
+ {"movd", 2, 0x0f7e, X, CpuMMX, FP|Modrm, { RegMMX, Reg64|LLongMem, 0 } },
+ {"movd", 2, 0x660f6e,X,CpuSSE2,FP|Modrm, { Reg64|LLongMem, RegXMM, 0 } },
+ {"movd", 2, 0x660f7e,X,CpuSSE2,FP|Modrm, { RegXMM, Reg64|LLongMem, 0 } },
+ /* In the 64bit mode the short form mov immediate is redefined to have
+ 64bit displacement value. */
{"movq", 2, 0x0f6f, X, CpuMMX, FP|Modrm, { RegMMX|LongMem, RegMMX, 0 } },
{"movq", 2, 0x0f7f, X, CpuMMX, FP|Modrm, { RegMMX, RegMMX|LongMem, 0 } },
{"movq", 2, 0xf30f7e,X,CpuSSE2,FP|Modrm, { RegXMM|LLongMem, RegXMM, 0 } },
{"movq", 2, 0x660fd6,X,CpuSSE2,FP|Modrm, { RegXMM, RegXMM|LLongMem, 0 } },
{"movq", 2, 0x88, X, Cpu64, NoSuf|D|W|Modrm|Size64,{ Reg64, Reg64|AnyMem, 0 } },
{"movq", 2, 0xc6, 0, Cpu64, NoSuf|W|Modrm|Size64, { Imm32S, Reg64|WordMem, 0 } },
{"movq", 2, 0xb0, X, Cpu64, NoSuf|W|ShortForm|Size64,{ Imm64, Reg64, 0 } },
*************** static const template i386_optab[] = {
*** 1140,1146 ****
{"movlhps", 2, 0x0f16, X, CpuSSE, FP|Modrm, { RegXMM|InvMem, RegXMM, 0 } },
{"movlps", 2, 0x0f12, X, CpuSSE, FP|Modrm, { LLongMem, RegXMM, 0 } },
{"movlps", 2, 0x0f13, X, CpuSSE, FP|Modrm, { RegXMM, LLongMem, 0 } },
! {"movmskps", 2, 0x0f50, X, CpuSSE, FP|Modrm, { RegXMM|InvMem, Reg32, 0 } },
{"movntps", 2, 0x0f2b, X, CpuSSE, FP|Modrm, { RegXMM, LLongMem, 0 } },
{"movntq", 2, 0x0fe7, X, CpuSSE, FP|Modrm, { RegMMX, LLongMem, 0 } },
{"movntdq", 2, 0x660fe7, X, CpuSSE2,FP|Modrm, { RegXMM, LLongMem, 0 } },
--- 1154,1160 ----
{"movlhps", 2, 0x0f16, X, CpuSSE, FP|Modrm, { RegXMM|InvMem, RegXMM, 0 } },
{"movlps", 2, 0x0f12, X, CpuSSE, FP|Modrm, { LLongMem, RegXMM, 0 } },
{"movlps", 2, 0x0f13, X, CpuSSE, FP|Modrm, { RegXMM, LLongMem, 0 } },
! {"movmskps", 2, 0x0f50, X, CpuSSE, lq_Suf|IgnoreSize|Modrm, { RegXMM|InvMem, Reg32|Reg64, 0 } },
{"movntps", 2, 0x0f2b, X, CpuSSE, FP|Modrm, { RegXMM, LLongMem, 0 } },
{"movntq", 2, 0x0fe7, X, CpuSSE, FP|Modrm, { RegMMX, LLongMem, 0 } },
{"movntdq", 2, 0x660fe7, X, CpuSSE2,FP|Modrm, { RegXMM, LLongMem, 0 } },
*************** static const template i386_optab[] = {
*** 1155,1164 ****
{"pavgb", 2, 0x660fe0, X, CpuSSE2,FP|Modrm, { RegXMM|LLongMem, RegXMM, 0 } },
{"pavgw", 2, 0x0fe3, X, CpuSSE, FP|Modrm, { RegMMX|LLongMem, RegMMX, 0 } },
{"pavgw", 2, 0x660fe3, X, CpuSSE2,FP|Modrm, { RegXMM|LLongMem, RegXMM, 0 } },
! {"pextrw", 3, 0x0fc5, X, CpuSSE, FP|Modrm, { Imm8, RegMMX|InvMem, Reg32 } },
! {"pextrw", 3, 0x660fc5, X, CpuSSE2,FP|Modrm, { Imm8, RegXMM|InvMem, Reg32 } },
! {"pinsrw", 3, 0x0fc4, X, CpuSSE, FP|Modrm, { Imm8, Reg32|ShortMem, RegMMX } },
! {"pinsrw", 3, 0x660fc4, X, CpuSSE2, FP|Modrm, { Imm8, Reg32|ShortMem, RegXMM } },
{"pmaxsw", 2, 0x0fee, X, CpuSSE, FP|Modrm, { RegMMX|LLongMem, RegMMX, 0 } },
{"pmaxsw", 2, 0x660fee, X, CpuSSE2,FP|Modrm, { RegXMM|LLongMem, RegXMM, 0 } },
{"pmaxub", 2, 0x0fde, X, CpuSSE, FP|Modrm, { RegMMX|LLongMem, RegMMX, 0 } },
--- 1169,1178 ----
{"pavgb", 2, 0x660fe0, X, CpuSSE2,FP|Modrm, { RegXMM|LLongMem, RegXMM, 0 } },
{"pavgw", 2, 0x0fe3, X, CpuSSE, FP|Modrm, { RegMMX|LLongMem, RegMMX, 0 } },
{"pavgw", 2, 0x660fe3, X, CpuSSE2,FP|Modrm, { RegXMM|LLongMem, RegXMM, 0 } },
! {"pextrw", 3, 0x0fc5, X, CpuSSE, lq_Suf|IgnoreSize|Modrm, { Imm8, RegMMX|InvMem, Reg32|Reg64 } },
! {"pextrw", 3, 0x660fc5, X, CpuSSE2,lq_Suf|IgnoreSize|Modrm, { Imm8, RegXMM|InvMem, Reg32|Reg64 } },
! {"pinsrw", 3, 0x0fc4, X, CpuSSE, lq_Suf|IgnoreSize|Modrm, { Imm8, Reg32|Reg64|ShortMem, RegMMX } },
! {"pinsrw", 3, 0x660fc4, X, CpuSSE2, lq_Suf|IgnoreSize|Modrm, { Imm8, Reg32|Reg64|ShortMem, RegXMM } },
{"pmaxsw", 2, 0x0fee, X, CpuSSE, FP|Modrm, { RegMMX|LLongMem, RegMMX, 0 } },
{"pmaxsw", 2, 0x660fee, X, CpuSSE2,FP|Modrm, { RegXMM|LLongMem, RegXMM, 0 } },
{"pmaxub", 2, 0x0fde, X, CpuSSE, FP|Modrm, { RegMMX|LLongMem, RegMMX, 0 } },
*************** static const template i386_optab[] = {
*** 1167,1174 ****
{"pminsw", 2, 0x660fea, X, CpuSSE2,FP|Modrm, { RegXMM|LLongMem, RegXMM, 0 } },
{"pminub", 2, 0x0fda, X, CpuSSE, FP|Modrm, { RegMMX|LLongMem, RegMMX, 0 } },
{"pminub", 2, 0x660fda, X, CpuSSE2,FP|Modrm, { RegXMM|LLongMem, RegXMM, 0 } },
! {"pmovmskb", 2, 0x0fd7, X, CpuSSE, FP|Modrm, { RegMMX|InvMem, Reg32, 0 } },
! {"pmovmskb", 2, 0x660fd7, X, CpuSSE2,FP|Modrm, { RegXMM|InvMem, Reg32, 0 } },
{"pmulhuw", 2, 0x0fe4, X, CpuSSE, FP|Modrm, { RegMMX|LLongMem, RegMMX, 0 } },
{"pmulhuw", 2, 0x660fe4, X, CpuSSE2,FP|Modrm, { RegXMM|LLongMem, RegXMM, 0 } },
{"prefetchnta", 1, 0x0f18, 0, CpuSSE, FP|Modrm, { LLongMem, 0, 0 } },
--- 1181,1188 ----
{"pminsw", 2, 0x660fea, X, CpuSSE2,FP|Modrm, { RegXMM|LLongMem, RegXMM, 0 } },
{"pminub", 2, 0x0fda, X, CpuSSE, FP|Modrm, { RegMMX|LLongMem, RegMMX, 0 } },
{"pminub", 2, 0x660fda, X, CpuSSE2,FP|Modrm, { RegXMM|LLongMem, RegXMM, 0 } },
! {"pmovmskb", 2, 0x0fd7, X, CpuSSE, lq_Suf|IgnoreSize|Modrm, { RegMMX|InvMem, Reg32|Reg64, 0 } },
! {"pmovmskb", 2, 0x660fd7, X, CpuSSE2,lq_Suf|IgnoreSize|Modrm, { RegXMM|InvMem, Reg32|Reg64, 0 } },
{"pmulhuw", 2, 0x0fe4, X, CpuSSE, FP|Modrm, { RegMMX|LLongMem, RegMMX, 0 } },
{"pmulhuw", 2, 0x660fe4, X, CpuSSE2,FP|Modrm, { RegXMM|LLongMem, RegXMM, 0 } },
{"prefetchnta", 1, 0x0f18, 0, CpuSSE, FP|Modrm, { LLongMem, 0, 0 } },
*************** static const template i386_optab[] = {
*** 1233,1239 ****
{"movhpd", 2, 0x660f17, X, CpuSSE2, FP|Modrm, { RegXMM, LLongMem, 0 } },
{"movlpd", 2, 0x660f12, X, CpuSSE2, FP|Modrm, { LLongMem, RegXMM, 0 } },
{"movlpd", 2, 0x660f13, X, CpuSSE2, FP|Modrm, { RegXMM, LLongMem, 0 } },
! {"movmskpd", 2, 0x660f50, X, CpuSSE2, FP|Modrm, { RegXMM|InvMem, Reg32, 0 } },
{"movntpd", 2, 0x660f2b, X, CpuSSE2, FP|Modrm, { RegXMM, LLongMem, 0 } },
{"movsd", 2, 0xf20f10, X, CpuSSE2, FP|Modrm, { RegXMM|LongMem, RegXMM, 0 } },
{"movsd", 2, 0xf20f11, X, CpuSSE2, FP|Modrm, { RegXMM, RegXMM|LongMem, 0 } },
--- 1247,1253 ----
{"movhpd", 2, 0x660f17, X, CpuSSE2, FP|Modrm, { RegXMM, LLongMem, 0 } },
{"movlpd", 2, 0x660f12, X, CpuSSE2, FP|Modrm, { LLongMem, RegXMM, 0 } },
{"movlpd", 2, 0x660f13, X, CpuSSE2, FP|Modrm, { RegXMM, LLongMem, 0 } },
! {"movmskpd", 2, 0x660f50, X, CpuSSE2, lq_Suf|IgnoreSize|Modrm, { RegXMM|InvMem, Reg32|Reg64, 0 } },
{"movntpd", 2, 0x660f2b, X, CpuSSE2, FP|Modrm, { RegXMM, LLongMem, 0 } },
{"movsd", 2, 0xf20f10, X, CpuSSE2, FP|Modrm, { RegXMM|LongMem, RegXMM, 0 } },
{"movsd", 2, 0xf20f11, X, CpuSSE2, FP|Modrm, { RegXMM, RegXMM|LongMem, 0 } },
*************** static const template i386_optab[] = {
*** 1257,1263 ****
{"cvtpd2pi", 2, 0x660f2d, X, CpuSSE2, FP|Modrm, { RegXMM|LLongMem, RegMMX, 0 } },
{"cvtpd2ps", 2, 0x660f5a, X, CpuSSE2, FP|Modrm, { RegXMM|LLongMem, RegXMM, 0 } },
{"cvtps2pd", 2, 0x0f5a, X, CpuSSE2, FP|Modrm, { RegXMM|LLongMem, RegXMM, 0 } },
! {"cvtps2dq", 2, 0x660f5b, X, CpuSSE2, FP|Modrm, { RegXMM|LLongMem, RegXMM, 0 } },
{"cvtsd2si", 2, 0xf20f2d, X, CpuSSE2, lq_Suf|IgnoreSize|Modrm,{ RegXMM|LLongMem, Reg32|Reg64, 0 } },
{"cvtsd2ss", 2, 0xf20f5a, X, CpuSSE2, FP|Modrm, { RegXMM|LLongMem, RegXMM, 0 } },
{"cvtss2sd", 2, 0xf30f5a, X, CpuSSE2, FP|Modrm, { RegXMM|LLongMem, RegXMM, 0 } },
--- 1271,1277 ----
{"cvtpd2pi", 2, 0x660f2d, X, CpuSSE2, FP|Modrm, { RegXMM|LLongMem, RegMMX, 0 } },
{"cvtpd2ps", 2, 0x660f5a, X, CpuSSE2, FP|Modrm, { RegXMM|LLongMem, RegXMM, 0 } },
{"cvtps2pd", 2, 0x0f5a, X, CpuSSE2, FP|Modrm, { RegXMM|LLongMem, RegXMM, 0 } },
! {"cvtps2dq", 2, 0x660f5b, X, CpuSSE2, FP|Modrm, { RegXMM|LLongMem, RegMMX, 0 } },
{"cvtsd2si", 2, 0xf20f2d, X, CpuSSE2, lq_Suf|IgnoreSize|Modrm,{ RegXMM|LLongMem, Reg32|Reg64, 0 } },
{"cvtsd2ss", 2, 0xf20f5a, X, CpuSSE2, FP|Modrm, { RegXMM|LLongMem, RegXMM, 0 } },
{"cvtss2sd", 2, 0xf30f5a, X, CpuSSE2, FP|Modrm, { RegXMM|LLongMem, RegXMM, 0 } },
*************** static const template i386_optab[] = {
*** 1270,1277 ****
{"movdqa", 2, 0x660f7f, X, CpuSSE2, FP|Modrm, { RegXMM, RegXMM|LLongMem, 0 } },
{"movdqu", 2, 0xf30f6f, X, CpuSSE2, FP|Modrm, { RegXMM|LLongMem, RegXMM, 0 } },
{"movdqu", 2, 0xf30f7f, X, CpuSSE2, FP|Modrm, { RegXMM, RegXMM|LLongMem, 0 } },
! {"movdq2q", 2, 0xf20fd6, X, CpuSSE2, FP|Modrm, { RegXMM|InvMem, RegMMX, 0 } },
! {"movq2dq", 2, 0xf30fd6, X, CpuSSE2, FP|Modrm, { RegMMX|InvMem, RegXMM, 0 } },
{"pmuludq", 2, 0x0ff4, X, CpuSSE2, FP|Modrm, { RegMMX|LongMem, RegMMX, 0 } },
{"pmuludq", 2, 0x660ff4, X, CpuSSE2, FP|Modrm, { RegXMM|LongMem, RegXMM, 0 } },
{"pshufd", 3, 0x660f70, X, CpuSSE2, FP|Modrm, { Imm8, RegXMM|LLongMem, RegXMM } },
--- 1284,1291 ----
{"movdqa", 2, 0x660f7f, X, CpuSSE2, FP|Modrm, { RegXMM, RegXMM|LLongMem, 0 } },
{"movdqu", 2, 0xf30f6f, X, CpuSSE2, FP|Modrm, { RegXMM|LLongMem, RegXMM, 0 } },
{"movdqu", 2, 0xf30f7f, X, CpuSSE2, FP|Modrm, { RegXMM, RegXMM|LLongMem, 0 } },
! {"movdq2q", 2, 0xf20fd6, X, CpuSSE2, FP|Modrm, { RegMMX|InvMem, RegXMM, 0 } },
! {"movq2dq", 2, 0xf30fd6, X, CpuSSE2, FP|Modrm, { RegXMM|InvMem, RegMMX, 0 } },
{"pmuludq", 2, 0x0ff4, X, CpuSSE2, FP|Modrm, { RegMMX|LongMem, RegMMX, 0 } },
{"pmuludq", 2, 0x660ff4, X, CpuSSE2, FP|Modrm, { RegXMM|LongMem, RegXMM, 0 } },
{"pshufd", 3, 0x660f70, X, CpuSSE2, FP|Modrm, { Imm8, RegXMM|LLongMem, RegXMM } },