This is the mail archive of the binutils@sourceware.org mailing list for the binutils project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

New VFP architecture support


The attached patch add support for two new VFP architecture variants. Firstly 
single precision VFPv3 (aka VFPv3xD), and secondly VFPv4 which adds fused 
multiply-add instructions to VFPv3.

Tested on arm-none-eabi.
Applied to CVS head.

Paul

2009-10-29  Paul Brook  <paul@codesourcery.com>

	ld/testsuite/
	* ld-arm/arm-elf.exp: Add new attr-merge-vfp tests.
	* ld-arm/attr-merge-vfp-1.d: New test.
	* ld-arm/attr-merge-vfp-1r.d: New test.
	* ld-arm/attr-merge-vfp-2.d: New test.
	* ld-arm/attr-merge-vfp-2r.d: New test.
	* ld-arm/attr-merge-vfp-3.d: New test.
	* ld-arm/attr-merge-vfp-3r.d: New test.
	* ld-arm/attr-merge-vfp-4.d: New test.
	* ld-arm/attr-merge-vfp-4r.d: New test.
	* ld-arm/attr-merge-vfp-5.d: New test.
	* ld-arm/attr-merge-vfp-5r.d: New test.
	* ld-arm/attr-merge-vfp-2.s: New test.
	* ld-arm/attr-merge-vfp-3.s: New test.
	* ld-arm/attr-merge-vfp-3-d16.s: New test.
	* ld-arm/attr-merge-vfp-4.s: New test.
	* ld-arm/attr-merge-vfp-4-d16.s: New test.

	gas/
	* doc/c-arm.texi: Document new -mfpu options.
	* config/tc-arm.c (fpu_vfp_ext_v3xd, fpu_vfp_fp16, fpu_neon_ext_fma,
	fpu_vfp_ext_fma): New.
	(NEON_ENC_TAB): Add vfma, vfms, vfnma and vfnms.
	(do_vfp_nsyn_fma_fms, do_neon_fmac): New functions.
	(insns): Move double precision load/store.  Split out double
	precision VFPv3 instrucitons.  Add VFPv4 instructions.
	(arm_fpus): Add VFPv3-FP16, VFPv3xD and VFPv4 variants.
	(aeabi_set_public_attributes): Set VFPv4 variants

	gas/testsuite/
	* gas/arm/attr-mfpu-vfpv4.d: New test.
	* gas/arm/attr-mfpu-vfpv4-d16.d: New test.
	* gas/arm/neon-fma-cov.d: New test.
	* gas/arm/neon-fma-cov.s: New test.
	* gas/arm/vfp-fma-inc.s: New test.
	* gas/arm/vfp-fma-arm.d: New test.
	* gas/arm/vfp-fma-arm.s: New test.
	* gas/arm/vfp-fma-thumb.d: New test.
	* gas/arm/vfp-fma-thumb.s: New test.
	* gas/arm/vfma1.d: New test.
	* gas/arm/vfma1.s: New test.
	* gas/arm/vfpv3xd.d: New test.
	* gas/arm/vfpv3xd.s: New test.

	include/opcode/
	* arm.h (FPU_VFP_EXT_V3xD, FPU_VFP_EXT_FP16, FPU_NEON_EXT_FMA,
	FPU_VFP_EXT_FMA, FPU_VFP_V3xD, FPU_VFP_V4D16, FPU_VFP_V4): Define.
	(FPU_ARCH_VFP_V3D16_FP16, FPU_ARCH_VFP_V3_FP16, FPU_ARCH_VFP_V3xD,
	FPU_ARCH_VFP_V3xD_FP16, FPU_ARCH_VFP_V4, FPU_ARCH_VFP_V4D16,
	FPU_ARCH_NEON_VFP_V4): Define.

	binutils/
	* readelf.c (arm_attr_tag_VFP_arch): Add VFPv4 and VFPv4-D16.

	bfd/
	* elf32-arm.c (elf32_arm_merge_eabi_attributes): Handle VFPv4
	attributes.

	opcodes/
	* arm-dis.c (coprocessor_opcodes): Update to use new feature flags.
	Add VFPv4 instructions.
Index: ld/testsuite/ld-arm/attr-merge-vfp-2r.d
===================================================================
--- ld/testsuite/ld-arm/attr-merge-vfp-2r.d	(revision 0)
+++ ld/testsuite/ld-arm/attr-merge-vfp-2r.d	(revision 0)
@@ -0,0 +1,13 @@
+#source: attr-merge-vfp-3-d16.s
+#source: attr-merge-vfp-3.s
+#as:
+#ld: -r
+#readelf: -A
+# This test is only valid on ELF based ports.
+# not-target: *-*-*coff *-*-pe *-*-wince *-*-*aout* *-*-netbsd *-*-riscix*
+
+Attribute Section: aeabi
+File Attributes
+  Tag_ARM_ISA_use: Yes
+  Tag_THUMB_ISA_use: Thumb-1
+  Tag_VFP_arch: VFPv3
Index: ld/testsuite/ld-arm/attr-merge-vfp-3r.d
===================================================================
--- ld/testsuite/ld-arm/attr-merge-vfp-3r.d	(revision 0)
+++ ld/testsuite/ld-arm/attr-merge-vfp-3r.d	(revision 0)
@@ -0,0 +1,14 @@
+#source: attr-merge-vfp-4-d16.s
+#source: attr-merge-vfp-3-d16.s
+#as:
+#ld: -r
+#readelf: -A
+# This test is only valid on ELF based ports.
+# not-target: *-*-*coff *-*-pe *-*-wince *-*-*aout* *-*-netbsd *-*-riscix*
+
+Attribute Section: aeabi
+File Attributes
+  Tag_ARM_ISA_use: Yes
+  Tag_THUMB_ISA_use: Thumb-1
+  Tag_VFP_arch: VFPv4-D16
+  Tag_VFP_HP_extension: Allowed
Index: ld/testsuite/ld-arm/attr-merge-vfp-4r.d
===================================================================
--- ld/testsuite/ld-arm/attr-merge-vfp-4r.d	(revision 0)
+++ ld/testsuite/ld-arm/attr-merge-vfp-4r.d	(revision 0)
@@ -0,0 +1,14 @@
+#source: attr-merge-vfp-4-d16.s
+#source: attr-merge-vfp-3.s
+#as:
+#ld: -r
+#readelf: -A
+# This test is only valid on ELF based ports.
+# not-target: *-*-*coff *-*-pe *-*-wince *-*-*aout* *-*-netbsd *-*-riscix*
+
+Attribute Section: aeabi
+File Attributes
+  Tag_ARM_ISA_use: Yes
+  Tag_THUMB_ISA_use: Thumb-1
+  Tag_VFP_arch: VFPv4
+  Tag_VFP_HP_extension: Allowed
Index: ld/testsuite/ld-arm/attr-merge-vfp-1.d
===================================================================
--- ld/testsuite/ld-arm/attr-merge-vfp-1.d	(revision 0)
+++ ld/testsuite/ld-arm/attr-merge-vfp-1.d	(revision 0)
@@ -0,0 +1,13 @@
+#source: attr-merge-vfp-2.s
+#source: attr-merge-vfp-3-d16.s
+#as:
+#ld: -r
+#readelf: -A
+# This test is only valid on ELF based ports.
+# not-target: *-*-*coff *-*-pe *-*-wince *-*-*aout* *-*-netbsd *-*-riscix*
+
+Attribute Section: aeabi
+File Attributes
+  Tag_ARM_ISA_use: Yes
+  Tag_THUMB_ISA_use: Thumb-1
+  Tag_VFP_arch: VFPv3-D16
Index: ld/testsuite/ld-arm/attr-merge-vfp-3-d16.s
===================================================================
--- ld/testsuite/ld-arm/attr-merge-vfp-3-d16.s	(revision 0)
+++ ld/testsuite/ld-arm/attr-merge-vfp-3-d16.s	(revision 0)
@@ -0,0 +1,2 @@
+.fpu vfpv3-d16
+
Index: ld/testsuite/ld-arm/attr-merge-vfp-2.d
===================================================================
--- ld/testsuite/ld-arm/attr-merge-vfp-2.d	(revision 0)
+++ ld/testsuite/ld-arm/attr-merge-vfp-2.d	(revision 0)
@@ -0,0 +1,13 @@
+#source: attr-merge-vfp-3.s
+#source: attr-merge-vfp-3-d16.s
+#as:
+#ld: -r
+#readelf: -A
+# This test is only valid on ELF based ports.
+# not-target: *-*-*coff *-*-pe *-*-wince *-*-*aout* *-*-netbsd *-*-riscix*
+
+Attribute Section: aeabi
+File Attributes
+  Tag_ARM_ISA_use: Yes
+  Tag_THUMB_ISA_use: Thumb-1
+  Tag_VFP_arch: VFPv3
Index: ld/testsuite/ld-arm/attr-merge-vfp-2.s
===================================================================
--- ld/testsuite/ld-arm/attr-merge-vfp-2.s	(revision 0)
+++ ld/testsuite/ld-arm/attr-merge-vfp-2.s	(revision 0)
@@ -0,0 +1,2 @@
+.fpu vfpv2
+
Index: ld/testsuite/ld-arm/attr-merge-vfp-5r.d
===================================================================
--- ld/testsuite/ld-arm/attr-merge-vfp-5r.d	(revision 0)
+++ ld/testsuite/ld-arm/attr-merge-vfp-5r.d	(revision 0)
@@ -0,0 +1,14 @@
+#source: attr-merge-vfp-4-d16.s
+#source: attr-merge-vfp-4.s
+#as:
+#ld: -r
+#readelf: -A
+# This test is only valid on ELF based ports.
+# not-target: *-*-*coff *-*-pe *-*-wince *-*-*aout* *-*-netbsd *-*-riscix*
+
+Attribute Section: aeabi
+File Attributes
+  Tag_ARM_ISA_use: Yes
+  Tag_THUMB_ISA_use: Thumb-1
+  Tag_VFP_arch: VFPv4
+  Tag_VFP_HP_extension: Allowed
Index: ld/testsuite/ld-arm/attr-merge-vfp-3.d
===================================================================
--- ld/testsuite/ld-arm/attr-merge-vfp-3.d	(revision 0)
+++ ld/testsuite/ld-arm/attr-merge-vfp-3.d	(revision 0)
@@ -0,0 +1,14 @@
+#source: attr-merge-vfp-3-d16.s
+#source: attr-merge-vfp-4-d16.s
+#as:
+#ld: -r
+#readelf: -A
+# This test is only valid on ELF based ports.
+# not-target: *-*-*coff *-*-pe *-*-wince *-*-*aout* *-*-netbsd *-*-riscix*
+
+Attribute Section: aeabi
+File Attributes
+  Tag_ARM_ISA_use: Yes
+  Tag_THUMB_ISA_use: Thumb-1
+  Tag_VFP_arch: VFPv4-D16
+  Tag_VFP_HP_extension: Allowed
Index: ld/testsuite/ld-arm/attr-merge-vfp-3.s
===================================================================
--- ld/testsuite/ld-arm/attr-merge-vfp-3.s	(revision 0)
+++ ld/testsuite/ld-arm/attr-merge-vfp-3.s	(revision 0)
@@ -0,0 +1,2 @@
+.fpu vfpv3
+
Index: ld/testsuite/ld-arm/attr-merge-vfp-4.d
===================================================================
--- ld/testsuite/ld-arm/attr-merge-vfp-4.d	(revision 0)
+++ ld/testsuite/ld-arm/attr-merge-vfp-4.d	(revision 0)
@@ -0,0 +1,14 @@
+#source: attr-merge-vfp-3.s
+#source: attr-merge-vfp-4-d16.s
+#as:
+#ld: -r
+#readelf: -A
+# This test is only valid on ELF based ports.
+# not-target: *-*-*coff *-*-pe *-*-wince *-*-*aout* *-*-netbsd *-*-riscix*
+
+Attribute Section: aeabi
+File Attributes
+  Tag_ARM_ISA_use: Yes
+  Tag_THUMB_ISA_use: Thumb-1
+  Tag_VFP_arch: VFPv4
+  Tag_VFP_HP_extension: Allowed
Index: ld/testsuite/ld-arm/attr-merge-vfp-4-d16.s
===================================================================
--- ld/testsuite/ld-arm/attr-merge-vfp-4-d16.s	(revision 0)
+++ ld/testsuite/ld-arm/attr-merge-vfp-4-d16.s	(revision 0)
@@ -0,0 +1,2 @@
+.fpu vfpv4-d16
+
Index: ld/testsuite/ld-arm/attr-merge-vfp-1r.d
===================================================================
--- ld/testsuite/ld-arm/attr-merge-vfp-1r.d	(revision 0)
+++ ld/testsuite/ld-arm/attr-merge-vfp-1r.d	(revision 0)
@@ -0,0 +1,13 @@
+#source: attr-merge-vfp-3-d16.s
+#source: attr-merge-vfp-2.s
+#as:
+#ld: -r
+#readelf: -A
+# This test is only valid on ELF based ports.
+# not-target: *-*-*coff *-*-pe *-*-wince *-*-*aout* *-*-netbsd *-*-riscix*
+
+Attribute Section: aeabi
+File Attributes
+  Tag_ARM_ISA_use: Yes
+  Tag_THUMB_ISA_use: Thumb-1
+  Tag_VFP_arch: VFPv3-D16
Index: ld/testsuite/ld-arm/attr-merge-vfp-4.s
===================================================================
--- ld/testsuite/ld-arm/attr-merge-vfp-4.s	(revision 0)
+++ ld/testsuite/ld-arm/attr-merge-vfp-4.s	(revision 0)
@@ -0,0 +1,2 @@
+.fpu vfpv4
+
Index: ld/testsuite/ld-arm/arm-elf.exp
===================================================================
--- ld/testsuite/ld-arm/arm-elf.exp	(revision 267253)
+++ ld/testsuite/ld-arm/arm-elf.exp	(working copy)
@@ -450,3 +450,13 @@ run_dump_test "unwind-2"
 run_dump_test "unwind-3"
 run_dump_test "unwind-4"
 run_dump_test "unwind-5"
+run_dump_test "attr-merge-vfp-1"
+run_dump_test "attr-merge-vfp-1r"
+run_dump_test "attr-merge-vfp-2"
+run_dump_test "attr-merge-vfp-2r"
+run_dump_test "attr-merge-vfp-3"
+run_dump_test "attr-merge-vfp-3r"
+run_dump_test "attr-merge-vfp-4"
+run_dump_test "attr-merge-vfp-4r"
+run_dump_test "attr-merge-vfp-5"
+run_dump_test "attr-merge-vfp-5r"
Index: ld/testsuite/ld-arm/attr-merge-vfp-5.d
===================================================================
--- ld/testsuite/ld-arm/attr-merge-vfp-5.d	(revision 0)
+++ ld/testsuite/ld-arm/attr-merge-vfp-5.d	(revision 0)
@@ -0,0 +1,14 @@
+#source: attr-merge-vfp-4.s
+#source: attr-merge-vfp-4-d16.s
+#as:
+#ld: -r
+#readelf: -A
+# This test is only valid on ELF based ports.
+# not-target: *-*-*coff *-*-pe *-*-wince *-*-*aout* *-*-netbsd *-*-riscix*
+
+Attribute Section: aeabi
+File Attributes
+  Tag_ARM_ISA_use: Yes
+  Tag_THUMB_ISA_use: Thumb-1
+  Tag_VFP_arch: VFPv4
+  Tag_VFP_HP_extension: Allowed
Index: gas/doc/c-arm.texi
===================================================================
--- gas/doc/c-arm.texi	(revision 267253)
+++ gas/doc/c-arm.texi	(working copy)
@@ -200,15 +200,22 @@ The following format options are recogni
 @code{vfp10-r0},
 @code{vfp9},
 @code{vfpxd},
-@code{vfpv2}
-@code{vfpv3}
-@code{vfpv3-d16}
+@code{vfpv2},
+@code{vfpv3},
+@code{vfpv3-fp16},
+@code{vfpv3-d16},
+@code{vfpv3-d16-fp16},
+@code{vfpv3xd},
+@code{vfpv3xd-d16},
+@code{vfpv4},
+@code{vfpv4-d16},
 @code{arm1020t},
 @code{arm1020e},
 @code{arm1136jf-s},
-@code{maverick}
+@code{maverick},
+@code{neon},
 and
-@code{neon}.
+@code{neon-vfpv4}.
 
 In addition to determining which instructions are assembled, this option
 also affects the way in which the @code{.double} assembler directive behaves
Index: gas/testsuite/gas/arm/attr-mfpu-vfpv4.d
===================================================================
--- gas/testsuite/gas/arm/attr-mfpu-vfpv4.d	(revision 0)
+++ gas/testsuite/gas/arm/attr-mfpu-vfpv4.d	(revision 0)
@@ -0,0 +1,13 @@
+# name: attributes for -mfpu=vfpv4
+# source: blank.s
+# as: -mfpu=vfpv4
+# readelf: -A
+# This test is only valid on EABI based ports.
+# target: *-*-*eabi
+
+Attribute Section: aeabi
+File Attributes
+  Tag_ARM_ISA_use: Yes
+  Tag_THUMB_ISA_use: Thumb-1
+  Tag_VFP_arch: VFPv4
+  Tag_VFP_HP_extension: Allowed
Index: gas/testsuite/gas/arm/neon-fma-cov.d
===================================================================
--- gas/testsuite/gas/arm/neon-fma-cov.d	(revision 0)
+++ gas/testsuite/gas/arm/neon-fma-cov.d	(revision 0)
@@ -0,0 +1,13 @@
+# name: Neon FMA instruction coverage
+# as: -mfpu=neon-vfpv4
+# objdump: -dr --prefix-addresses --show-raw-insn
+
+.*: +file format .*arm.*
+
+Disassembly of section \.text:
+0[0-9a-f]+ <[^>]+> f2000c50 	vfma\.f32	q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2000c50 	vfma\.f32	q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2000c10 	vfma\.f32	d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2200c50 	vfms\.f32	q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2200c50 	vfms\.f32	q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2200c10 	vfms\.f32	d0, d0, d0
Index: gas/testsuite/gas/arm/neon-fma-cov.s
===================================================================
--- gas/testsuite/gas/arm/neon-fma-cov.s	(revision 0)
+++ gas/testsuite/gas/arm/neon-fma-cov.s	(revision 0)
@@ -0,0 +1,12 @@
+	.arm
+	.syntax unified
+	.text
+
+	.macro regs3_1 op opq vtype
+	\op\vtype q0,q0,q0
+	\opq\vtype q0,q0,q0
+	\op\vtype d0,d0,d0
+	.endm
+
+	regs3_1 vfma vfma .f32
+	regs3_1 vfms vfms .f32
Index: gas/testsuite/gas/arm/vfp-fma-arm.d
===================================================================
--- gas/testsuite/gas/arm/vfp-fma-arm.d	(revision 0)
+++ gas/testsuite/gas/arm/vfp-fma-arm.d	(revision 0)
@@ -0,0 +1,23 @@
+# name: FMA instructions, ARM mode
+# as: -mfpu=vfpv4 -I$srcdir/$subdir
+# objdump: -dr --prefix-addresses --show-raw-insn
+
+.*: +file format .*arm.*
+
+Disassembly of section .text:
+0[0-9a-f]+ <[^>]+> eea00a81 	vfma\.f32	s0, s1, s2
+0[0-9a-f]+ <[^>]+> eea10b02 	vfma\.f64	d0, d1, d2
+0[0-9a-f]+ <[^>]+> 0ea00a81 	vfmaeq\.f32	s0, s1, s2
+0[0-9a-f]+ <[^>]+> 0ea10b02 	vfmaeq\.f64	d0, d1, d2
+0[0-9a-f]+ <[^>]+> eea00ac1 	vfms\.f32	s0, s1, s2
+0[0-9a-f]+ <[^>]+> eea10b42 	vfms\.f64	d0, d1, d2
+0[0-9a-f]+ <[^>]+> 0ea00ac1 	vfmseq\.f32	s0, s1, s2
+0[0-9a-f]+ <[^>]+> 0ea10b42 	vfmseq\.f64	d0, d1, d2
+0[0-9a-f]+ <[^>]+> ee900ac1 	vfnma\.f32	s0, s1, s2
+0[0-9a-f]+ <[^>]+> ee910b42 	vfnma\.f64	d0, d1, d2
+0[0-9a-f]+ <[^>]+> 0e900ac1 	vfnmaeq\.f32	s0, s1, s2
+0[0-9a-f]+ <[^>]+> 0e910b42 	vfnmaeq\.f64	d0, d1, d2
+0[0-9a-f]+ <[^>]+> ee900a81 	vfnms\.f32	s0, s1, s2
+0[0-9a-f]+ <[^>]+> ee910b02 	vfnms\.f64	d0, d1, d2
+0[0-9a-f]+ <[^>]+> 0e900a81 	vfnmseq\.f32	s0, s1, s2
+0[0-9a-f]+ <[^>]+> 0e910b02 	vfnmseq\.f64	d0, d1, d2
Index: gas/testsuite/gas/arm/attr-mfpu-vfpv4-d16.d
===================================================================
--- gas/testsuite/gas/arm/attr-mfpu-vfpv4-d16.d	(revision 0)
+++ gas/testsuite/gas/arm/attr-mfpu-vfpv4-d16.d	(revision 0)
@@ -0,0 +1,13 @@
+# name: attributes for -mfpu=vfpv4-d16
+# source: blank.s
+# as: -mfpu=vfpv4-d16
+# readelf: -A
+# This test is only valid on EABI based ports.
+# target: *-*-*eabi
+
+Attribute Section: aeabi
+File Attributes
+  Tag_ARM_ISA_use: Yes
+  Tag_THUMB_ISA_use: Thumb-1
+  Tag_VFP_arch: VFPv4-D16
+  Tag_VFP_HP_extension: Allowed
Index: gas/testsuite/gas/arm/vfp-fma-inc.s
===================================================================
--- gas/testsuite/gas/arm/vfp-fma-inc.s	(revision 0)
+++ gas/testsuite/gas/arm/vfp-fma-inc.s	(revision 0)
@@ -0,0 +1,20 @@
+	.syntax unified
+
+	.include "itblock.s"
+
+func:
+	.macro dyadic op cond="" f32=".f32" f64=".f64"
+	itblock 2 \cond
+	\op\cond\f32 s0,s1,s2
+	\op\cond\f64 d0,d1,d2
+	.endm
+
+	.macro dyadic_c op
+	dyadic \op
+	dyadic \op eq
+	.endm
+
+	dyadic_c vfma
+	dyadic_c vfms
+	dyadic_c vfnma
+	dyadic_c vfnms
Index: gas/testsuite/gas/arm/vfp-fma-arm.s
===================================================================
--- gas/testsuite/gas/arm/vfp-fma-arm.s	(revision 0)
+++ gas/testsuite/gas/arm/vfp-fma-arm.s	(revision 0)
@@ -0,0 +1,2 @@
+	.arm
+	.include "vfp-fma-inc.s"
Index: gas/testsuite/gas/arm/vfma1.d
===================================================================
--- gas/testsuite/gas/arm/vfma1.d	(revision 0)
+++ gas/testsuite/gas/arm/vfma1.d	(revision 0)
@@ -0,0 +1,34 @@
+#objdump: -dr --prefix-addresses --show-raw-insn
+#name: VFMA decoding
+#as: -mcpu=arm7m
+
+# Test VFMA instruction disassembly
+
+.*: *file format .*arm.*
+
+
+Disassembly of section .text:
+00000000 <[^>]*> ee000a00 	vmla.f32	s0, s0, s0
+00000004 <[^>]*> ee000b00 	vmla.f64	d0, d0, d0
+00000008 <[^>]*> f2000d10 	vmla.f32	d0, d0, d0
+0000000c <[^>]*> f2000d50 	vmla.f32	q0, q0, q0
+00000010 <[^>]*> eea00a00 	vfma.f32	s0, s0, s0
+00000014 <[^>]*> eea00b00 	vfma.f64	d0, d0, d0
+00000018 <[^>]*> f2000c10 	vfma.f32	d0, d0, d0
+0000001c <[^>]*> f2000c50 	vfma.f32	q0, q0, q0
+00000020 <[^>]*> ee000a40 	vmls.f32	s0, s0, s0
+00000024 <[^>]*> ee000b40 	vmls.f64	d0, d0, d0
+00000028 <[^>]*> f2200d10 	vmls.f32	d0, d0, d0
+0000002c <[^>]*> f2200d50 	vmls.f32	q0, q0, q0
+00000030 <[^>]*> eea00a40 	vfms.f32	s0, s0, s0
+00000034 <[^>]*> eea00b40 	vfms.f64	d0, d0, d0
+00000038 <[^>]*> f2200c10 	vfms.f32	d0, d0, d0
+0000003c <[^>]*> f2200c50 	vfms.f32	q0, q0, q0
+00000040 <[^>]*> ee100a40 	vnmla.f32	s0, s0, s0
+00000044 <[^>]*> ee100b40 	vnmla.f64	d0, d0, d0
+00000048 <[^>]*> ee900a40 	vfnma.f32	s0, s0, s0
+0000004c <[^>]*> ee900b40 	vfnma.f64	d0, d0, d0
+00000050 <[^>]*> ee100a00 	vnmls.f32	s0, s0, s0
+00000054 <[^>]*> ee100b00 	vnmls.f64	d0, d0, d0
+00000058 <[^>]*> ee900a00 	vfnms.f32	s0, s0, s0
+0000005c <[^>]*> ee900b00 	vfnms.f64	d0, d0, d0
Index: gas/testsuite/gas/arm/vfma1.s
===================================================================
--- gas/testsuite/gas/arm/vfma1.s	(revision 0)
+++ gas/testsuite/gas/arm/vfma1.s	(revision 0)
@@ -0,0 +1,43 @@
+
+	.eabi_attribute Tag_Advanced_SIMD_arch, 2
+	.eabi_attribute Tag_VFP_arch, 6
+	
+	@VMLA
+	.inst 0xee000a00 @ VFP  vmla.f32 s0,s0,s0
+	.inst 0xee000b00 @ VFP  vmla.f64 d0,d0,d0
+	.inst 0xf2000d10 @ NEON vmla.f32 d0,d0,d0
+	.inst 0xf2000d50 @ NEON vmla.f32 q0,q0,q0
+
+	@VFMA new
+	.inst 0xeea00a00 @ VFP  vfma.f32 s0,s0,s0
+	.inst 0xeea00b00 @ VFP  vfma.f64 d0,d0,d0
+	.inst 0xf2000c10 @ NEON vfma.f32 d0,d0,d0
+	.inst 0xf2000c50 @ NEON vfma.f32 q0,q0,q0
+
+	@VMLS
+	.inst 0xee000a40 @ VFP  vmls.F32 s0,s0,s0
+	.inst 0xee000b40 @ VFP  vmls.F64 d0,d0,d0
+	.inst 0xf2200d10 @ NEON vmls.F32 d0,d0,d0
+	.inst 0xf2200d50 @ NEON vmls.F32 q0,q0,q0
+
+	@VFMS new
+	.inst 0xeea00a40 @ VFP  vfms.F32 s0,s0,s0
+	.inst 0xeea00b40 @ VFP  vfms.F64 d0,d0,d0
+	.inst 0xf2200c10 @ NEON vfms.F32 d0,d0,d0
+	.inst 0xf2200c50 @ NEON vfms.F32 q0,q0,q0
+
+	@VNMLA
+	.inst 0xee100a40 @ VFP  vnmla.F32 s0,s0,s0
+	.inst 0xee100b40 @ VFP  vnmla.F64 d0,d0,d0
+
+	@VFNMA new
+	.inst 0xee900a40 @ VFP  vfnma.F32 s0,s0,s0
+	.inst 0xee900b40 @ VFP  vfnma.F64 d0,d0,d0
+
+	@VNMLS
+	.inst 0xee100a00 @ VFP  vnmls.F32 s0,s0,s0
+	.inst 0xee100b00 @ VFP  vnmls.F64 d0,d0,d0
+
+	@VFNMS new
+	.inst 0xee900a00 @ VFP  vfnms.F32 s0,s0,s0
+	.inst 0xee900b00 @ VFP  vfnms.F64 d0,d0,d0
Index: gas/testsuite/gas/arm/vfpv3xd.d
===================================================================
--- gas/testsuite/gas/arm/vfpv3xd.d	(revision 0)
+++ gas/testsuite/gas/arm/vfpv3xd.d	(revision 0)
@@ -0,0 +1,23 @@
+#objdump: -dr --prefix-addresses --show-raw-insn
+#name: VFP Double-precision load/store
+#as: -mfpu=vfpv3xd
+
+# Test the ARM VFP Double Precision load/store on single precision FPU
+
+.*: +file format .*arm.*
+
+Disassembly of section .text:
+0+[0-9a-f]* <[^>]*> ed900b00 	vldr	d0, \[r0\]
+0+[0-9a-f]* <[^>]*> ed800b00 	vstr	d0, \[r0\]
+0+[0-9a-f]* <[^>]*> ec900b02 	vldmia	r0, {d0}
+0+[0-9a-f]* <[^>]*> ec900b02 	vldmia	r0, {d0}
+0+[0-9a-f]* <[^>]*> ecb00b02 	vldmia	r0!, {d0}
+0+[0-9a-f]* <[^>]*> ecb00b02 	vldmia	r0!, {d0}
+0+[0-9a-f]* <[^>]*> ed300b02 	vldmdb	r0!, {d0}
+0+[0-9a-f]* <[^>]*> ed300b02 	vldmdb	r0!, {d0}
+0+[0-9a-f]* <[^>]*> ec800b02 	vstmia	r0, {d0}
+0+[0-9a-f]* <[^>]*> ec800b02 	vstmia	r0, {d0}
+0+[0-9a-f]* <[^>]*> eca00b02 	vstmia	r0!, {d0}
+0+[0-9a-f]* <[^>]*> eca00b02 	vstmia	r0!, {d0}
+0+[0-9a-f]* <[^>]*> ed200b02 	vstmdb	r0!, {d0}
+0+[0-9a-f]* <[^>]*> ed200b02 	vstmdb	r0!, {d0}
Index: gas/testsuite/gas/arm/vfpv3xd.s
===================================================================
--- gas/testsuite/gas/arm/vfpv3xd.s	(revision 0)
+++ gas/testsuite/gas/arm/vfpv3xd.s	(revision 0)
@@ -0,0 +1,19 @@
+# Check double precision load/store are allowed on single precision
+# implementation
+
+	fldd	d0, [r0]
+	fstd	d0, [r0]
+
+	fldmiad	r0, {d0}
+	fldmfdd	r0, {d0}
+	fldmiad	r0!, {d0}
+	fldmfdd	r0!, {d0}
+	fldmdbd	r0!, {d0}
+	fldmead	r0!, {d0}
+
+	fstmiad	r0, {d0}
+	fstmead	r0, {d0}
+	fstmiad	r0!, {d0}
+	fstmead	r0!, {d0}
+	fstmdbd	r0!, {d0}
+	fstmfdd	r0!, {d0}
Index: gas/testsuite/gas/arm/vfp-fma-thumb.d
===================================================================
--- gas/testsuite/gas/arm/vfp-fma-thumb.d	(revision 0)
+++ gas/testsuite/gas/arm/vfp-fma-thumb.d	(revision 0)
@@ -0,0 +1,27 @@
+# name: FMA instructions, Thumb mode
+# as: -mfpu=vfpv4 -I$srcdir/$subdir
+# objdump: -dr --prefix-addresses --show-raw-insn
+
+.*: +file format .*arm.*
+
+Disassembly of section .text:
+0[0-9a-f]+ <[^>]+> eea0 0a81 	vfma\.f32	s0, s1, s2
+0[0-9a-f]+ <[^>]+> eea1 0b02 	vfma\.f64	d0, d1, d2
+0[0-9a-f]+ <[^>]+> bf04      	itt	eq
+0[0-9a-f]+ <[^>]+> eea0 0a81 	vfmaeq\.f32	s0, s1, s2
+0[0-9a-f]+ <[^>]+> eea1 0b02 	vfmaeq\.f64	d0, d1, d2
+0[0-9a-f]+ <[^>]+> eea0 0ac1 	vfms\.f32	s0, s1, s2
+0[0-9a-f]+ <[^>]+> eea1 0b42 	vfms\.f64	d0, d1, d2
+0[0-9a-f]+ <[^>]+> bf04      	itt	eq
+0[0-9a-f]+ <[^>]+> eea0 0ac1 	vfmseq\.f32	s0, s1, s2
+0[0-9a-f]+ <[^>]+> eea1 0b42 	vfmseq\.f64	d0, d1, d2
+0[0-9a-f]+ <[^>]+> ee90 0ac1 	vfnma\.f32	s0, s1, s2
+0[0-9a-f]+ <[^>]+> ee91 0b42 	vfnma\.f64	d0, d1, d2
+0[0-9a-f]+ <[^>]+> bf04      	itt	eq
+0[0-9a-f]+ <[^>]+> ee90 0ac1 	vfnmaeq\.f32	s0, s1, s2
+0[0-9a-f]+ <[^>]+> ee91 0b42 	vfnmaeq\.f64	d0, d1, d2
+0[0-9a-f]+ <[^>]+> ee90 0a81 	vfnms\.f32	s0, s1, s2
+0[0-9a-f]+ <[^>]+> ee91 0b02 	vfnms\.f64	d0, d1, d2
+0[0-9a-f]+ <[^>]+> bf04      	itt	eq
+0[0-9a-f]+ <[^>]+> ee90 0a81 	vfnmseq\.f32	s0, s1, s2
+0[0-9a-f]+ <[^>]+> ee91 0b02 	vfnmseq\.f64	d0, d1, d2
Index: gas/testsuite/gas/arm/vfp-fma-thumb.s
===================================================================
--- gas/testsuite/gas/arm/vfp-fma-thumb.s	(revision 0)
+++ gas/testsuite/gas/arm/vfp-fma-thumb.s	(revision 0)
@@ -0,0 +1,2 @@
+	.thumb
+	.include "vfp-fma-inc.s"
Index: gas/config/tc-arm.c
===================================================================
--- gas/config/tc-arm.c	(revision 267253)
+++ gas/config/tc-arm.c	(working copy)
@@ -216,13 +216,16 @@ static const arm_feature_set fpu_vfp_ext
   ARM_FEATURE (0, FPU_VFP_EXT_V1xD);
 static const arm_feature_set fpu_vfp_ext_v1 = ARM_FEATURE (0, FPU_VFP_EXT_V1);
 static const arm_feature_set fpu_vfp_ext_v2 = ARM_FEATURE (0, FPU_VFP_EXT_V2);
+static const arm_feature_set fpu_vfp_ext_v3xd = ARM_FEATURE (0, FPU_VFP_EXT_V3xD);
 static const arm_feature_set fpu_vfp_ext_v3 = ARM_FEATURE (0, FPU_VFP_EXT_V3);
 static const arm_feature_set fpu_vfp_ext_d32 =
   ARM_FEATURE (0, FPU_VFP_EXT_D32);
 static const arm_feature_set fpu_neon_ext_v1 = ARM_FEATURE (0, FPU_NEON_EXT_V1);
 static const arm_feature_set fpu_vfp_v3_or_neon_ext =
   ARM_FEATURE (0, FPU_NEON_EXT_V1 | FPU_VFP_EXT_V3);
-static const arm_feature_set fpu_neon_fp16 = ARM_FEATURE (0, FPU_NEON_FP16);
+static const arm_feature_set fpu_vfp_fp16 = ARM_FEATURE (0, FPU_VFP_EXT_FP16);
+static const arm_feature_set fpu_neon_ext_fma = ARM_FEATURE (0, FPU_NEON_EXT_FMA);
+static const arm_feature_set fpu_vfp_ext_fma = ARM_FEATURE (0, FPU_VFP_EXT_FMA);
 
 static int mfloat_abi_opt = -1;
 /* Record user cpu selection for object attributes.  */
@@ -11293,6 +11296,8 @@ struct neon_tab_entry
      vcge / vcgt with the operands reversed.  */  	\
   X(vclt,	0x0000300, 0x1200e00, 0x1b10200),	\
   X(vcle,	0x0000310, 0x1000e00, 0x1b10180),	\
+  X(vfma,	N_INV, 0x0000c10, N_INV),		\
+  X(vfms,	N_INV, 0x0200c10, N_INV),		\
   X(vmla,	0x0000900, 0x0000d10, 0x0800040),	\
   X(vmls,	0x1000900, 0x0200d10, 0x0800440),	\
   X(vmul,	0x0000910, 0x1000d10, 0x0800840),	\
@@ -11330,6 +11335,8 @@ struct neon_tab_entry
   X(vnmul,      0xe200a40, 0xe200b40, N_INV),		\
   X(vnmla,      0xe100a40, 0xe100b40, N_INV),		\
   X(vnmls,      0xe100a00, 0xe100b00, N_INV),		\
+  X(vfnma,      0xe900a40, 0xe900b40, N_INV),		\
+  X(vfnms,      0xe900a00, 0xe900b00, N_INV),		\
   X(vcmp,	0xeb40a40, 0xeb40b40, N_INV),		\
   X(vcmpz,	0xeb50a40, 0xeb50b40, N_INV),		\
   X(vcmpe,	0xeb40ac0, 0xeb40bc0, N_INV),		\
@@ -12149,6 +12156,27 @@ do_vfp_nsyn_mla_mls (enum neon_shape rs)
 }
 
 static void
+do_vfp_nsyn_fma_fms (enum neon_shape rs)
+{
+  int is_fma = (inst.instruction & 0x0fffffff) == N_MNEM_vfma;
+
+  if (rs == NS_FFF)
+    {
+      if (is_fma)
+        do_vfp_nsyn_opcode ("ffmas");
+      else
+        do_vfp_nsyn_opcode ("ffnmas");
+    }
+  else
+    {
+      if (is_fma)
+        do_vfp_nsyn_opcode ("ffmad");
+      else
+        do_vfp_nsyn_opcode ("ffnmad");
+    }
+}
+
+static void
 do_vfp_nsyn_mul (enum neon_shape rs)
 {
   if (rs == NS_FFF)
@@ -13116,6 +13144,18 @@ do_neon_mac_maybe_scalar (void)
 }
 
 static void
+do_neon_fmac (void)
+{
+  if (try_vfp_nsyn (3, do_vfp_nsyn_fma_fms) == SUCCESS)
+    return;
+
+  if (vfp_or_neon_is_neon (NEON_CHECK_CC | NEON_CHECK_ARCH) == FAIL)
+    return;
+
+  neon_dyadic_misc (NT_untyped, N_IF_32, 0);
+}
+
+static void
 do_neon_tst (void)
 {
   enum neon_shape rs = neon_select_shape (NS_DDD, NS_QQQ, NS_NULL);
@@ -17179,6 +17219,19 @@ static const struct asm_opcode insns[] =
  cCE("fcmpes",	eb40ac0, 2, (RVS, RVS),	      vfp_sp_monadic),
  cCE("fcmpezs",	eb50ac0, 1, (RVS),	      vfp_sp_compare_z),
 
+ /* Double precision load/store are still present on single precision
+    implementations.  */
+ cCE("fldd",	d100b00, 2, (RVD, ADDRGLDC),  vfp_dp_ldst),
+ cCE("fstd",	d000b00, 2, (RVD, ADDRGLDC),  vfp_dp_ldst),
+ cCE("fldmiad",	c900b00, 2, (RRw, VRDLST),    vfp_dp_ldstmia),
+ cCE("fldmfdd",	c900b00, 2, (RRw, VRDLST),    vfp_dp_ldstmia),
+ cCE("fldmdbd",	d300b00, 2, (RRw, VRDLST),    vfp_dp_ldstmdb),
+ cCE("fldmead",	d300b00, 2, (RRw, VRDLST),    vfp_dp_ldstmdb),
+ cCE("fstmiad",	c800b00, 2, (RRw, VRDLST),    vfp_dp_ldstmia),
+ cCE("fstmead",	c800b00, 2, (RRw, VRDLST),    vfp_dp_ldstmia),
+ cCE("fstmdbd",	d200b00, 2, (RRw, VRDLST),    vfp_dp_ldstmdb),
+ cCE("fstmfdd",	d200b00, 2, (RRw, VRDLST),    vfp_dp_ldstmdb),
+
 #undef  ARM_VARIANT
 #define ARM_VARIANT  & fpu_vfp_ext_v1 /* VFP V1 (Double precision).  */
 
@@ -17197,18 +17250,6 @@ static const struct asm_opcode insns[] =
  cCE("ftouid",	ebc0b40, 2, (RVS, RVD),	      vfp_sp_dp_cvt),
  cCE("ftouizd",	ebc0bc0, 2, (RVS, RVD),	      vfp_sp_dp_cvt),
 
-  /* Memory operations.	 */
- cCE("fldd",	d100b00, 2, (RVD, ADDRGLDC),  vfp_dp_ldst),
- cCE("fstd",	d000b00, 2, (RVD, ADDRGLDC),  vfp_dp_ldst),
- cCE("fldmiad",	c900b00, 2, (RRw, VRDLST),    vfp_dp_ldstmia),
- cCE("fldmfdd",	c900b00, 2, (RRw, VRDLST),    vfp_dp_ldstmia),
- cCE("fldmdbd",	d300b00, 2, (RRw, VRDLST),    vfp_dp_ldstmdb),
- cCE("fldmead",	d300b00, 2, (RRw, VRDLST),    vfp_dp_ldstmdb),
- cCE("fstmiad",	c800b00, 2, (RRw, VRDLST),    vfp_dp_ldstmia),
- cCE("fstmead",	c800b00, 2, (RRw, VRDLST),    vfp_dp_ldstmia),
- cCE("fstmdbd",	d200b00, 2, (RRw, VRDLST),    vfp_dp_ldstmdb),
- cCE("fstmfdd",	d200b00, 2, (RRw, VRDLST),    vfp_dp_ldstmdb),
-
   /* Monadic operations.  */
  cCE("fabsd",	eb00bc0, 2, (RVD, RVD),	      vfp_dp_rd_rm),
  cCE("fnegd",	eb10b40, 2, (RVD, RVD),	      vfp_dp_rd_rm),
@@ -17535,29 +17576,52 @@ static const struct asm_opcode insns[] =
  nUF(vst4,      _vst4,    2, (NSTRLST, ADDR),  neon_ldx_stx),
 
 #undef  THUMB_VARIANT
+#define THUMB_VARIANT &fpu_vfp_ext_v3xd
+#undef ARM_VARIANT
+#define ARM_VARIANT &fpu_vfp_ext_v3xd
+ cCE("fconsts",   eb00a00, 2, (RVS, I255),      vfp_sp_const),
+ cCE("fshtos",    eba0a40, 2, (RVS, I16z),      vfp_sp_conv_16),
+ cCE("fsltos",    eba0ac0, 2, (RVS, I32),       vfp_sp_conv_32),
+ cCE("fuhtos",    ebb0a40, 2, (RVS, I16z),      vfp_sp_conv_16),
+ cCE("fultos",    ebb0ac0, 2, (RVS, I32),       vfp_sp_conv_32),
+ cCE("ftoshs",    ebe0a40, 2, (RVS, I16z),      vfp_sp_conv_16),
+ cCE("ftosls",    ebe0ac0, 2, (RVS, I32),       vfp_sp_conv_32),
+ cCE("ftouhs",    ebf0a40, 2, (RVS, I16z),      vfp_sp_conv_16),
+ cCE("ftouls",    ebf0ac0, 2, (RVS, I32),       vfp_sp_conv_32),
+
+#undef THUMB_VARIANT
 #define THUMB_VARIANT  & fpu_vfp_ext_v3
 #undef  ARM_VARIANT
 #define ARM_VARIANT    & fpu_vfp_ext_v3
 
- cCE("fconsts",   eb00a00, 2, (RVS, I255),      vfp_sp_const),
  cCE("fconstd",   eb00b00, 2, (RVD, I255),      vfp_dp_const),
- cCE("fshtos",    eba0a40, 2, (RVS, I16z),      vfp_sp_conv_16),
  cCE("fshtod",    eba0b40, 2, (RVD, I16z),      vfp_dp_conv_16),
- cCE("fsltos",    eba0ac0, 2, (RVS, I32),       vfp_sp_conv_32),
  cCE("fsltod",    eba0bc0, 2, (RVD, I32),       vfp_dp_conv_32),
- cCE("fuhtos",    ebb0a40, 2, (RVS, I16z),      vfp_sp_conv_16),
  cCE("fuhtod",    ebb0b40, 2, (RVD, I16z),      vfp_dp_conv_16),
- cCE("fultos",    ebb0ac0, 2, (RVS, I32),       vfp_sp_conv_32),
  cCE("fultod",    ebb0bc0, 2, (RVD, I32),       vfp_dp_conv_32),
- cCE("ftoshs",    ebe0a40, 2, (RVS, I16z),      vfp_sp_conv_16),
  cCE("ftoshd",    ebe0b40, 2, (RVD, I16z),      vfp_dp_conv_16),
- cCE("ftosls",    ebe0ac0, 2, (RVS, I32),       vfp_sp_conv_32),
  cCE("ftosld",    ebe0bc0, 2, (RVD, I32),       vfp_dp_conv_32),
- cCE("ftouhs",    ebf0a40, 2, (RVS, I16z),      vfp_sp_conv_16),
  cCE("ftouhd",    ebf0b40, 2, (RVD, I16z),      vfp_dp_conv_16),
- cCE("ftouls",    ebf0ac0, 2, (RVS, I32),       vfp_sp_conv_32),
  cCE("ftould",    ebf0bc0, 2, (RVD, I32),       vfp_dp_conv_32),
 
+#undef ARM_VARIANT
+#define ARM_VARIANT &fpu_vfp_ext_fma
+#undef THUMB_VARIANT
+#define THUMB_VARIANT &fpu_vfp_ext_fma
+ /* Mnemonics shared by Neon and VFP.  These are included in the
+    VFP FMA variant; NEON and VFP FMA always includes the NEON
+    FMA instructions.  */
+ nCEF(vfma,     _vfma,    3, (RNSDQ, oRNSDQ, RNSDQ), neon_fmac),
+ nCEF(vfms,     _vfms,    3, (RNSDQ, oRNSDQ, RNSDQ), neon_fmac),
+ /* ffmas/ffmad/ffmss/ffmsd are dummy mnemonics to satisfy gas;
+    the v form should always be used.  */
+ cCE("ffmas",	ea00a00, 3, (RVS, RVS, RVS),  vfp_sp_dyadic),
+ cCE("ffnmas",	ea00a40, 3, (RVS, RVS, RVS),  vfp_sp_dyadic),
+ cCE("ffmad",	ea00b00, 3, (RVD, RVD, RVD),  vfp_dp_rd_rn_rm),
+ cCE("ffnmad",	ea00b40, 3, (RVD, RVD, RVD),  vfp_dp_rd_rn_rm),
+ nCE(vfnma,     _vfnma,   3, (RVSD, RVSD, RVSD), vfp_nsyn_nmul),
+ nCE(vfnms,     _vfnms,   3, (RVSD, RVSD, RVSD), vfp_nsyn_nmul),
+
 #undef THUMB_VARIANT
 #undef  ARM_VARIANT
 #define ARM_VARIANT  & arm_cext_xscale /* Intel XScale extensions.  */
@@ -21967,7 +22031,11 @@ static const struct arm_option_cpu_value
   {"vfpxd",		FPU_ARCH_VFP_V1xD},
   {"vfpv2",		FPU_ARCH_VFP_V2},
   {"vfpv3",		FPU_ARCH_VFP_V3},
+  {"vfpv3-fp16",	FPU_ARCH_VFP_V3_FP16},
   {"vfpv3-d16",		FPU_ARCH_VFP_V3D16},
+  {"vfpv3-d16-fp16",	FPU_ARCH_VFP_V3D16_FP16},
+  {"vfpv3xd",		FPU_ARCH_VFP_V3xD},
+  {"vfpv3xd-fp16",	FPU_ARCH_VFP_V3xD_FP16},
   {"arm1020t",		FPU_ARCH_VFP_V1},
   {"arm1020e",		FPU_ARCH_VFP_V2},
   {"arm1136jfs",	FPU_ARCH_VFP_V2},
@@ -21975,6 +22043,9 @@ static const struct arm_option_cpu_value
   {"maverick",		FPU_ARCH_MAVERICK},
   {"neon",              FPU_ARCH_VFP_V3_PLUS_NEON_V1},
   {"neon-fp16",		FPU_ARCH_NEON_FP16},
+  {"vfpv4",		FPU_ARCH_VFP_V4},
+  {"vfpv4-d16",		FPU_ARCH_VFP_V4D16},
+  {"neon-vfpv4",	FPU_ARCH_NEON_VFP_V4},
   {NULL,		ARM_ARCH_NONE}
 };
 
@@ -22453,8 +22524,10 @@ aeabi_set_public_attributes (void)
 	}
       aeabi_set_attribute_string (Tag_CPU_name, p);
     }
+
   /* Tag_CPU_arch.  */
   aeabi_set_attribute_int (Tag_CPU_arch, arch);
+
   /* Tag_CPU_arch_profile.  */
   if (ARM_CPU_HAS_FEATURE (flags, arm_ext_v7a))
     aeabi_set_attribute_int (Tag_CPU_arch_profile, 'A');
@@ -22462,17 +22535,24 @@ aeabi_set_public_attributes (void)
     aeabi_set_attribute_int (Tag_CPU_arch_profile, 'R');
   else if (ARM_CPU_HAS_FEATURE (flags, arm_ext_m))
     aeabi_set_attribute_int (Tag_CPU_arch_profile, 'M');
+
   /* Tag_ARM_ISA_use.  */
   if (ARM_CPU_HAS_FEATURE (flags, arm_ext_v1)
       || arch == 0)
     aeabi_set_attribute_int (Tag_ARM_ISA_use, 1);
+
   /* Tag_THUMB_ISA_use.  */
   if (ARM_CPU_HAS_FEATURE (flags, arm_ext_v4t)
       || arch == 0)
     aeabi_set_attribute_int (Tag_THUMB_ISA_use,
 	ARM_CPU_HAS_FEATURE (flags, arm_arch_t2) ? 2 : 1);
+
   /* Tag_VFP_arch.  */
-  if (ARM_CPU_HAS_FEATURE (flags, fpu_vfp_ext_d32))
+  if (ARM_CPU_HAS_FEATURE (flags, fpu_vfp_ext_fma))
+    aeabi_set_attribute_int (Tag_VFP_arch,
+			     ARM_CPU_HAS_FEATURE (flags, fpu_vfp_ext_d32)
+			     ? 5 : 6);
+  else if (ARM_CPU_HAS_FEATURE (flags, fpu_vfp_ext_d32))
     aeabi_set_attribute_int (Tag_VFP_arch, 3);
   else if (ARM_CPU_HAS_FEATURE (flags, fpu_vfp_ext_v3))
     aeabi_set_attribute_int (Tag_VFP_arch, 4);
@@ -22481,16 +22561,21 @@ aeabi_set_public_attributes (void)
   else if (ARM_CPU_HAS_FEATURE (flags, fpu_vfp_ext_v1)
            || ARM_CPU_HAS_FEATURE (flags, fpu_vfp_ext_v1xd))
     aeabi_set_attribute_int (Tag_VFP_arch, 1);
+
   /* Tag_WMMX_arch.  */
   if (ARM_CPU_HAS_FEATURE (flags, arm_cext_iwmmxt2))
     aeabi_set_attribute_int (Tag_WMMX_arch, 2);
   else if (ARM_CPU_HAS_FEATURE (flags, arm_cext_iwmmxt))
     aeabi_set_attribute_int (Tag_WMMX_arch, 1);
+
   /* Tag_Advanced_SIMD_arch (formerly Tag_NEON_arch).  */
   if (ARM_CPU_HAS_FEATURE (flags, fpu_neon_ext_v1))
-    aeabi_set_attribute_int (Tag_Advanced_SIMD_arch, 1);
+    aeabi_set_attribute_int
+      (Tag_Advanced_SIMD_arch, (ARM_CPU_HAS_FEATURE (flags, fpu_neon_ext_fma)
+				? 2 : 1));
+  
   /* Tag_VFP_HP_extension (formerly Tag_NEON_FP16_arch).  */
-  if (ARM_CPU_HAS_FEATURE (flags, fpu_neon_fp16))
+  if (ARM_CPU_HAS_FEATURE (flags, fpu_vfp_fp16))
     aeabi_set_attribute_int (Tag_VFP_HP_extension, 1);
 }
 
Index: include/opcode/arm.h
===================================================================
--- include/opcode/arm.h	(revision 267253)
+++ include/opcode/arm.h	(working copy)
@@ -62,10 +62,13 @@
 #define FPU_VFP_EXT_V1xD 0x08000000	/* Base VFP instruction set.  */
 #define FPU_VFP_EXT_V1	 0x04000000	/* Double-precision insns.    */
 #define FPU_VFP_EXT_V2	 0x02000000	/* ARM10E VFPr1.	      */
-#define FPU_VFP_EXT_V3	 0x01000000	/* VFPv3 insns.	              */
-#define FPU_NEON_EXT_V1	 0x00800000	/* Neon (SIMD) insns.	      */
-#define FPU_VFP_EXT_D32  0x00400000	/* Registers D16-D31.	      */
-#define FPU_NEON_FP16	 0x00200000	/* Half-precision extensions. */
+#define FPU_VFP_EXT_V3xD 0x01000000	/* VFPv3 single-precision.    */
+#define FPU_VFP_EXT_V3	 0x00800000	/* VFPv3 double-precision.    */
+#define FPU_NEON_EXT_V1	 0x00400000	/* Neon (SIMD) insns.	      */
+#define FPU_VFP_EXT_D32  0x00200000	/* Registers D16-D31.	      */
+#define FPU_VFP_EXT_FP16 0x00100000	/* Half-precision extensions. */
+#define FPU_NEON_EXT_FMA 0x00080000	/* Neon fused multiply-add    */
+#define FPU_VFP_EXT_FMA	 0x00040000	/* VFP fused multiply-add     */
 
 /* Architectures are the sum of the base and extensions.  The ARM ARM (rev E)
    defines the following: ARMv3, ARMv3M, ARMv4xM, ARMv4, ARMv4TxM, ARMv4T,
@@ -120,9 +123,13 @@
 #define FPU_VFP_V1xD	(FPU_VFP_EXT_V1xD | FPU_ENDIAN_PURE)
 #define FPU_VFP_V1	(FPU_VFP_V1xD | FPU_VFP_EXT_V1)
 #define FPU_VFP_V2	(FPU_VFP_V1 | FPU_VFP_EXT_V2)
-#define FPU_VFP_V3D16	(FPU_VFP_V2 | FPU_VFP_EXT_V3)
+#define FPU_VFP_V3D16	(FPU_VFP_V2 | FPU_VFP_EXT_V3xD | FPU_VFP_EXT_V3)
 #define FPU_VFP_V3	(FPU_VFP_V3D16 | FPU_VFP_EXT_D32)
+#define FPU_VFP_V3xD	(FPU_VFP_V1xD | FPU_VFP_EXT_V2 | FPU_VFP_EXT_V3xD)
+#define FPU_VFP_V4D16	(FPU_VFP_V3D16 | FPU_VFP_EXT_FP16 | FPU_VFP_EXT_FMA)
+#define FPU_VFP_V4	(FPU_VFP_V3 | FPU_VFP_EXT_FP16 | FPU_VFP_EXT_FMA)
 #define FPU_VFP_HARD	(FPU_VFP_EXT_V1xD | FPU_VFP_EXT_V1 | FPU_VFP_EXT_V2 \
+			 | FPU_VFP_EXT_V3xD | FPU_VFP_EXT_FMA | FPU_NEON_EXT_FMA \
                          | FPU_VFP_EXT_V3 | FPU_NEON_EXT_V1 | FPU_VFP_EXT_D32)
 #define FPU_FPA		(FPU_FPA_EXT_V1 | FPU_FPA_EXT_V2)
 
@@ -136,13 +143,22 @@
 #define FPU_ARCH_VFP_V1	  ARM_FEATURE (0, FPU_VFP_V1)
 #define FPU_ARCH_VFP_V2	  ARM_FEATURE (0, FPU_VFP_V2)
 #define FPU_ARCH_VFP_V3D16	ARM_FEATURE (0, FPU_VFP_V3D16)
+#define FPU_ARCH_VFP_V3D16_FP16 \
+  ARM_FEATURE (0, FPU_VFP_V3D16 | FPU_VFP_EXT_FP16)
 #define FPU_ARCH_VFP_V3	  ARM_FEATURE (0, FPU_VFP_V3)
+#define FPU_ARCH_VFP_V3_FP16	ARM_FEATURE (0, FPU_VFP_V3 | FPU_VFP_EXT_FP16)
+#define FPU_ARCH_VFP_V3xD	ARM_FEATURE (0, FPU_VFP_V3xD)
+#define FPU_ARCH_VFP_V3xD_FP16	ARM_FEATURE (0, FPU_VFP_V3xD | FPU_VFP_EXT_FP16)
 #define FPU_ARCH_NEON_V1  ARM_FEATURE (0, FPU_NEON_EXT_V1)
 #define FPU_ARCH_VFP_V3_PLUS_NEON_V1 \
   ARM_FEATURE (0, FPU_VFP_V3 | FPU_NEON_EXT_V1)
 #define FPU_ARCH_NEON_FP16 \
-  ARM_FEATURE (0, FPU_VFP_V3 | FPU_NEON_EXT_V1 | FPU_NEON_FP16)
+  ARM_FEATURE (0, FPU_VFP_V3 | FPU_NEON_EXT_V1 | FPU_VFP_EXT_FP16)
 #define FPU_ARCH_VFP_HARD ARM_FEATURE (0, FPU_VFP_HARD)
+#define FPU_ARCH_VFP_V4 ARM_FEATURE(0, FPU_VFP_V4)
+#define FPU_ARCH_VFP_V4D16 ARM_FEATURE(0, FPU_VFP_V4D16)
+#define FPU_ARCH_NEON_VFP_V4 \
+  ARM_FEATURE(0, FPU_VFP_V4 | FPU_NEON_EXT_V1 | FPU_NEON_EXT_FMA)
 
 #define FPU_ARCH_ENDIAN_PURE ARM_FEATURE (0, FPU_ENDIAN_PURE)
 
Index: binutils/readelf.c
===================================================================
--- binutils/readelf.c	(revision 267253)
+++ binutils/readelf.c	(working copy)
@@ -8967,7 +8967,7 @@ static const char * arm_attr_tag_ARM_ISA
 static const char * arm_attr_tag_THUMB_ISA_use[] =
   {"No", "Thumb-1", "Thumb-2"};
 static const char * arm_attr_tag_VFP_arch[] =
-  {"No", "VFPv1", "VFPv2", "VFPv3", "VFPv3-D16"};
+  {"No", "VFPv1", "VFPv2", "VFPv3", "VFPv3-D16", "VFPv4", "VFPv4-D16"};
 static const char * arm_attr_tag_WMMX_arch[] = {"No", "WMMXv1", "WMMXv2"};
 static const char * arm_attr_tag_Advanced_SIMD_arch[] = {"No", "NEONv1"};
 static const char * arm_attr_tag_PCS_config[] =
Index: bfd/elf32-arm.c
===================================================================
--- bfd/elf32-arm.c	(revision 267253)
+++ bfd/elf32-arm.c	(working copy)
@@ -9730,8 +9730,6 @@ elf32_arm_merge_eabi_attributes (bfd *ib
   /* Some tags have 0 = don't care, 1 = strong requirement,
      2 = weak requirement.  */
   static const int order_021[3] = {0, 2, 1};
-  /* For use with Tag_VFP_arch.  */
-  static const int order_01243[5] = {0, 1, 2, 4, 3};
   int i;
   bfd_boolean result = TRUE;
 
@@ -9923,12 +9921,50 @@ elf32_arm_merge_eabi_attributes (bfd *ib
 	    }
 	  break;
 	case Tag_VFP_arch:
-	  /* Use the "greatest" from the sequence 0, 1, 2, 4, 3, or the
-	     largest value if greater than 4 (for future-proofing).  */
-	  if ((in_attr[i].i > 4 && in_attr[i].i > out_attr[i].i)
-	      || (in_attr[i].i <= 4 && out_attr[i].i <= 4
-		  && order_01243[in_attr[i].i] > order_01243[out_attr[i].i]))
-	    out_attr[i].i = in_attr[i].i;
+	    {
+	      static const struct
+	      {
+		  int ver;
+		  int regs;
+	      } vfp_versions[7] =
+		{
+		  {0, 0},
+		  {1, 16},
+		  {2, 16},
+		  {3, 32},
+		  {3, 16},
+		  {4, 32},
+		  {4, 16}
+		};
+	      int ver;
+	      int regs;
+	      int newval;
+
+	      /* Values greater than 6 aren't defined, so just pick the
+	         biggest */
+	      if (in_attr[i].i > 6 && in_attr[i].i > out_attr[i].i)
+		{
+		  out_attr[i] = in_attr[i];
+		  break;
+		}
+	      /* The output uses the superset of input features
+		 (ISA version) and registers.  */
+	      ver = vfp_versions[in_attr[i].i].ver;
+	      if (ver < vfp_versions[out_attr[i].i].ver)
+		ver = vfp_versions[out_attr[i].i].ver;
+	      regs = vfp_versions[in_attr[i].i].regs;
+	      if (regs < vfp_versions[out_attr[i].i].regs)
+		regs = vfp_versions[out_attr[i].i].regs;
+	      /* This assumes all possible supersets are also a valid
+	         options.  */
+	      for (newval = 6; newval > 0; newval--)
+		{
+		  if (regs == vfp_versions[newval].regs
+		      && ver == vfp_versions[newval].ver)
+		    break;
+		}
+	      out_attr[i].i = newval;
+	    }
 	  break;
 	case Tag_PCS_config:
 	  if (out_attr[i].i == 0)
Index: opcodes/arm-dis.c
===================================================================
--- opcodes/arm-dis.c	(revision 267253)
+++ opcodes/arm-dis.c	(working copy)
@@ -289,8 +289,8 @@ static const struct opcode32 coprocessor
   {FPU_NEON_EXT_V1, 0x0e400b10, 0x0fd00f10, "vmov%c.8\t%16-19,7D[%5,6,21d], %12-15r"},
   {FPU_NEON_EXT_V1, 0x0e500b10, 0x0f500f10, "vmov%c.%23?us8\t%12-15r, %16-19,7D[%5,6,21d]"},
   /* Half-precision conversion instructions.  */
-  {FPU_NEON_FP16,   0x0eb20a40, 0x0fbf0f50, "vcvt%7?tb%c.f32.f16\t%y1, %y0"},
-  {FPU_NEON_FP16,   0x0eb30a40, 0x0fbf0f50, "vcvt%7?tb%c.f16.f32\t%y1, %y0"},
+  {FPU_VFP_EXT_FP16, 0x0eb20a40, 0x0fbf0f50, "vcvt%7?tb%c.f32.f16\t%y1, %y0"},
+  {FPU_VFP_EXT_FP16, 0x0eb30a40, 0x0fbf0f50, "vcvt%7?tb%c.f16.f32\t%y1, %y0"},
 
   /* Floating point coprocessor (VFP) instructions.  */
   {FPU_VFP_EXT_V1xD, 0x0ee00a10, 0x0fff0fff, "vmsr%c\tfpsid, %12-15r"},
@@ -330,14 +330,14 @@ static const struct opcode32 coprocessor
   {FPU_VFP_EXT_V1, 0x0eb80b40, 0x0fbf0f50, "vcvt%c.f64.%7?su32\t%z1, %y0"},
   {FPU_VFP_EXT_V1xD, 0x0eb40a40, 0x0fbf0f50, "vcmp%7'e%c.f32\t%y1, %y0"},
   {FPU_VFP_EXT_V1, 0x0eb40b40, 0x0fbf0f50, "vcmp%7'e%c.f64\t%z1, %z0"},
-  {FPU_VFP_EXT_V3, 0x0eba0a40, 0x0fbe0f50, "vcvt%c.f32.%16?us%7?31%7?26\t%y1, %y1, #%5,0-3k"},
+  {FPU_VFP_EXT_V3xD, 0x0eba0a40, 0x0fbe0f50, "vcvt%c.f32.%16?us%7?31%7?26\t%y1, %y1, #%5,0-3k"},
   {FPU_VFP_EXT_V3, 0x0eba0b40, 0x0fbe0f50, "vcvt%c.f64.%16?us%7?31%7?26\t%z1, %z1, #%5,0-3k"},
   {FPU_VFP_EXT_V1xD, 0x0ebc0a40, 0x0fbe0f50, "vcvt%7`r%c.%16?su32.f32\t%y1, %y0"},
   {FPU_VFP_EXT_V1, 0x0ebc0b40, 0x0fbe0f50, "vcvt%7`r%c.%16?su32.f64\t%y1, %z0"},
-  {FPU_VFP_EXT_V3, 0x0ebe0a40, 0x0fbe0f50, "vcvt%c.%16?us%7?31%7?26.f32\t%y1, %y1, #%5,0-3k"},
+  {FPU_VFP_EXT_V3xD, 0x0ebe0a40, 0x0fbe0f50, "vcvt%c.%16?us%7?31%7?26.f32\t%y1, %y1, #%5,0-3k"},
   {FPU_VFP_EXT_V3, 0x0ebe0b40, 0x0fbe0f50, "vcvt%c.%16?us%7?31%7?26.f64\t%z1, %z1, #%5,0-3k"},
   {FPU_VFP_EXT_V1, 0x0c500b10, 0x0fb00ff0, "vmov%c\t%12-15r, %16-19r, %z0"},
-  {FPU_VFP_EXT_V3, 0x0eb00a00, 0x0fb00ff0, "vmov%c.f32\t%y1, #%0-3,16-19d"},
+  {FPU_VFP_EXT_V3xD, 0x0eb00a00, 0x0fb00ff0, "vmov%c.f32\t%y1, #%0-3,16-19d"},
   {FPU_VFP_EXT_V3, 0x0eb00b00, 0x0fb00ff0, "vmov%c.f64\t%z1, #%0-3,16-19d"},
   {FPU_VFP_EXT_V2, 0x0c400a10, 0x0ff00fd0, "vmov%c\t%y4, %12-15r, %16-19r"},
   {FPU_VFP_EXT_V2, 0x0c400b10, 0x0ff00fd0, "vmov%c\t%z0, %12-15r, %16-19r"},
@@ -447,6 +447,16 @@ static const struct opcode32 coprocessor
   {ARM_CEXT_MAVERICK, 0x0e200600, 0x0ff00f10, "cfmadda32%c\tmvax%5-7d, mvax%12-15d, mvfx%16-19d, mvfx%0-3d"},
   {ARM_CEXT_MAVERICK, 0x0e300600, 0x0ff00f10, "cfmsuba32%c\tmvax%5-7d, mvax%12-15d, mvfx%16-19d, mvfx%0-3d"},
 
+  /* VFP Fused multiply add instructions.  */
+  {FPU_VFP_EXT_FMA, 0x0ea00a00, 0x0fb00f50, "vfma%c.f32\t%y1, %y2, %y0"},
+  {FPU_VFP_EXT_FMA, 0x0ea00b00, 0x0fb00f50, "vfma%c.f64\t%z1, %z2, %z0"},
+  {FPU_VFP_EXT_FMA, 0x0ea00a40, 0x0fb00f50, "vfms%c.f32\t%y1, %y2, %y0"},
+  {FPU_VFP_EXT_FMA, 0x0ea00b40, 0x0fb00f50, "vfms%c.f64\t%z1, %z2, %z0"},
+  {FPU_VFP_EXT_FMA, 0x0e900a40, 0x0fb00f50, "vfnma%c.f32\t%y1, %y2, %y0"},
+  {FPU_VFP_EXT_FMA, 0x0e900b40, 0x0fb00f50, "vfnma%c.f64\t%z1, %z2, %z0"},
+  {FPU_VFP_EXT_FMA, 0x0e900a00, 0x0fb00f50, "vfnms%c.f32\t%y1, %y2, %y0"},
+  {FPU_VFP_EXT_FMA, 0x0e900b00, 0x0fb00f50, "vfnms%c.f64\t%z1, %z2, %z0"},
+
   /* Generic coprocessor instructions.  */
   { 0, SENTINEL_GENERIC_START, 0, "" },
   {ARM_EXT_V5E, 0x0c400000, 0x0ff00000, "mcrr%c\t%8-11d, %4-7d, %12-15r, %16-19r, cr%0-3d"},
@@ -517,8 +527,12 @@ static const struct opcode32 neon_opcode
   {FPU_NEON_EXT_V1, 0xf3b00840, 0xffb00c50, "vtbx%c.8\t%12-15,22D, %F, %0-3,5D"},
   
   /* Half-precision conversions.  */
-  {FPU_NEON_FP16,   0xf3b60600, 0xffbf0fd0, "vcvt%c.f16.f32\t%12-15,22D, %0-3,5Q"},
-  {FPU_NEON_FP16,   0xf3b60700, 0xffbf0fd0, "vcvt%c.f32.f16\t%12-15,22Q, %0-3,5D"},
+  {FPU_VFP_EXT_FP16, 0xf3b60600, 0xffbf0fd0, "vcvt%c.f16.f32\t%12-15,22D, %0-3,5Q"},
+  {FPU_VFP_EXT_FP16, 0xf3b60700, 0xffbf0fd0, "vcvt%c.f32.f16\t%12-15,22Q, %0-3,5D"},
+
+  /* NEON fused multiply add instructions.  */
+  {FPU_NEON_EXT_FMA, 0xf2000c10, 0xffa00f10, "vfma%c.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"},
+  {FPU_NEON_EXT_FMA, 0xf2200c10, 0xffa00f10, "vfms%c.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"},
 
   /* Two registers, miscellaneous.  */
   {FPU_NEON_EXT_V1, 0xf2880a10, 0xfebf0fd0, "vmovl%c.%24?us8\t%12-15,22Q, %0-3,5D"},

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]