New VFP architecture support

View: New views
1 Messages — Rating Filter:   Alert me  

New VFP architecture support

by Paul Brook :: Rate this Message:

Reply to Author | View Threaded | Show Only this Message

The attached patch add support for two new VFP architecture variants. Firstly
single precision VFPv3 (aka VFPv3xD), and secondly VFPv4 which adds fused
multiply-add instructions to VFPv3.

Tested on arm-none-eabi.
Applied to CVS head.

Paul

2009-10-29  Paul Brook  <paul@...>

        ld/testsuite/
        * ld-arm/arm-elf.exp: Add new attr-merge-vfp tests.
        * ld-arm/attr-merge-vfp-1.d: New test.
        * ld-arm/attr-merge-vfp-1r.d: New test.
        * ld-arm/attr-merge-vfp-2.d: New test.
        * ld-arm/attr-merge-vfp-2r.d: New test.
        * ld-arm/attr-merge-vfp-3.d: New test.
        * ld-arm/attr-merge-vfp-3r.d: New test.
        * ld-arm/attr-merge-vfp-4.d: New test.
        * ld-arm/attr-merge-vfp-4r.d: New test.
        * ld-arm/attr-merge-vfp-5.d: New test.
        * ld-arm/attr-merge-vfp-5r.d: New test.
        * ld-arm/attr-merge-vfp-2.s: New test.
        * ld-arm/attr-merge-vfp-3.s: New test.
        * ld-arm/attr-merge-vfp-3-d16.s: New test.
        * ld-arm/attr-merge-vfp-4.s: New test.
        * ld-arm/attr-merge-vfp-4-d16.s: New test.

        gas/
        * doc/c-arm.texi: Document new -mfpu options.
        * config/tc-arm.c (fpu_vfp_ext_v3xd, fpu_vfp_fp16, fpu_neon_ext_fma,
        fpu_vfp_ext_fma): New.
        (NEON_ENC_TAB): Add vfma, vfms, vfnma and vfnms.
        (do_vfp_nsyn_fma_fms, do_neon_fmac): New functions.
        (insns): Move double precision load/store.  Split out double
        precision VFPv3 instrucitons.  Add VFPv4 instructions.
        (arm_fpus): Add VFPv3-FP16, VFPv3xD and VFPv4 variants.
        (aeabi_set_public_attributes): Set VFPv4 variants

        gas/testsuite/
        * gas/arm/attr-mfpu-vfpv4.d: New test.
        * gas/arm/attr-mfpu-vfpv4-d16.d: New test.
        * gas/arm/neon-fma-cov.d: New test.
        * gas/arm/neon-fma-cov.s: New test.
        * gas/arm/vfp-fma-inc.s: New test.
        * gas/arm/vfp-fma-arm.d: New test.
        * gas/arm/vfp-fma-arm.s: New test.
        * gas/arm/vfp-fma-thumb.d: New test.
        * gas/arm/vfp-fma-thumb.s: New test.
        * gas/arm/vfma1.d: New test.
        * gas/arm/vfma1.s: New test.
        * gas/arm/vfpv3xd.d: New test.
        * gas/arm/vfpv3xd.s: New test.

        include/opcode/
        * arm.h (FPU_VFP_EXT_V3xD, FPU_VFP_EXT_FP16, FPU_NEON_EXT_FMA,
        FPU_VFP_EXT_FMA, FPU_VFP_V3xD, FPU_VFP_V4D16, FPU_VFP_V4): Define.
        (FPU_ARCH_VFP_V3D16_FP16, FPU_ARCH_VFP_V3_FP16, FPU_ARCH_VFP_V3xD,
        FPU_ARCH_VFP_V3xD_FP16, FPU_ARCH_VFP_V4, FPU_ARCH_VFP_V4D16,
        FPU_ARCH_NEON_VFP_V4): Define.

        binutils/
        * readelf.c (arm_attr_tag_VFP_arch): Add VFPv4 and VFPv4-D16.

        bfd/
        * elf32-arm.c (elf32_arm_merge_eabi_attributes): Handle VFPv4
        attributes.

        opcodes/
        * arm-dis.c (coprocessor_opcodes): Update to use new feature flags.
        Add VFPv4 instructions.

Index: ld/testsuite/ld-arm/attr-merge-vfp-2r.d
===================================================================
--- ld/testsuite/ld-arm/attr-merge-vfp-2r.d (revision 0)
+++ ld/testsuite/ld-arm/attr-merge-vfp-2r.d (revision 0)
@@ -0,0 +1,13 @@
+#source: attr-merge-vfp-3-d16.s
+#source: attr-merge-vfp-3.s
+#as:
+#ld: -r
+#readelf: -A
+# This test is only valid on ELF based ports.
+# not-target: *-*-*coff *-*-pe *-*-wince *-*-*aout* *-*-netbsd *-*-riscix*
+
+Attribute Section: aeabi
+File Attributes
+  Tag_ARM_ISA_use: Yes
+  Tag_THUMB_ISA_use: Thumb-1
+  Tag_VFP_arch: VFPv3
Index: ld/testsuite/ld-arm/attr-merge-vfp-3r.d
===================================================================
--- ld/testsuite/ld-arm/attr-merge-vfp-3r.d (revision 0)
+++ ld/testsuite/ld-arm/attr-merge-vfp-3r.d (revision 0)
@@ -0,0 +1,14 @@
+#source: attr-merge-vfp-4-d16.s
+#source: attr-merge-vfp-3-d16.s
+#as:
+#ld: -r
+#readelf: -A
+# This test is only valid on ELF based ports.
+# not-target: *-*-*coff *-*-pe *-*-wince *-*-*aout* *-*-netbsd *-*-riscix*
+
+Attribute Section: aeabi
+File Attributes
+  Tag_ARM_ISA_use: Yes
+  Tag_THUMB_ISA_use: Thumb-1
+  Tag_VFP_arch: VFPv4-D16
+  Tag_VFP_HP_extension: Allowed
Index: ld/testsuite/ld-arm/attr-merge-vfp-4r.d
===================================================================
--- ld/testsuite/ld-arm/attr-merge-vfp-4r.d (revision 0)
+++ ld/testsuite/ld-arm/attr-merge-vfp-4r.d (revision 0)
@@ -0,0 +1,14 @@
+#source: attr-merge-vfp-4-d16.s
+#source: attr-merge-vfp-3.s
+#as:
+#ld: -r
+#readelf: -A
+# This test is only valid on ELF based ports.
+# not-target: *-*-*coff *-*-pe *-*-wince *-*-*aout* *-*-netbsd *-*-riscix*
+
+Attribute Section: aeabi
+File Attributes
+  Tag_ARM_ISA_use: Yes
+  Tag_THUMB_ISA_use: Thumb-1
+  Tag_VFP_arch: VFPv4
+  Tag_VFP_HP_extension: Allowed
Index: ld/testsuite/ld-arm/attr-merge-vfp-1.d
===================================================================
--- ld/testsuite/ld-arm/attr-merge-vfp-1.d (revision 0)
+++ ld/testsuite/ld-arm/attr-merge-vfp-1.d (revision 0)
@@ -0,0 +1,13 @@
+#source: attr-merge-vfp-2.s
+#source: attr-merge-vfp-3-d16.s
+#as:
+#ld: -r
+#readelf: -A
+# This test is only valid on ELF based ports.
+# not-target: *-*-*coff *-*-pe *-*-wince *-*-*aout* *-*-netbsd *-*-riscix*
+
+Attribute Section: aeabi
+File Attributes
+  Tag_ARM_ISA_use: Yes
+  Tag_THUMB_ISA_use: Thumb-1
+  Tag_VFP_arch: VFPv3-D16
Index: ld/testsuite/ld-arm/attr-merge-vfp-3-d16.s
===================================================================
--- ld/testsuite/ld-arm/attr-merge-vfp-3-d16.s (revision 0)
+++ ld/testsuite/ld-arm/attr-merge-vfp-3-d16.s (revision 0)
@@ -0,0 +1,2 @@
+.fpu vfpv3-d16
+
Index: ld/testsuite/ld-arm/attr-merge-vfp-2.d
===================================================================
--- ld/testsuite/ld-arm/attr-merge-vfp-2.d (revision 0)
+++ ld/testsuite/ld-arm/attr-merge-vfp-2.d (revision 0)
@@ -0,0 +1,13 @@
+#source: attr-merge-vfp-3.s
+#source: attr-merge-vfp-3-d16.s
+#as:
+#ld: -r
+#readelf: -A
+# This test is only valid on ELF based ports.
+# not-target: *-*-*coff *-*-pe *-*-wince *-*-*aout* *-*-netbsd *-*-riscix*
+
+Attribute Section: aeabi
+File Attributes
+  Tag_ARM_ISA_use: Yes
+  Tag_THUMB_ISA_use: Thumb-1
+  Tag_VFP_arch: VFPv3
Index: ld/testsuite/ld-arm/attr-merge-vfp-2.s
===================================================================
--- ld/testsuite/ld-arm/attr-merge-vfp-2.s (revision 0)
+++ ld/testsuite/ld-arm/attr-merge-vfp-2.s (revision 0)
@@ -0,0 +1,2 @@
+.fpu vfpv2
+
Index: ld/testsuite/ld-arm/attr-merge-vfp-5r.d
===================================================================
--- ld/testsuite/ld-arm/attr-merge-vfp-5r.d (revision 0)
+++ ld/testsuite/ld-arm/attr-merge-vfp-5r.d (revision 0)
@@ -0,0 +1,14 @@
+#source: attr-merge-vfp-4-d16.s
+#source: attr-merge-vfp-4.s
+#as:
+#ld: -r
+#readelf: -A
+# This test is only valid on ELF based ports.
+# not-target: *-*-*coff *-*-pe *-*-wince *-*-*aout* *-*-netbsd *-*-riscix*
+
+Attribute Section: aeabi
+File Attributes
+  Tag_ARM_ISA_use: Yes
+  Tag_THUMB_ISA_use: Thumb-1
+  Tag_VFP_arch: VFPv4
+  Tag_VFP_HP_extension: Allowed
Index: ld/testsuite/ld-arm/attr-merge-vfp-3.d
===================================================================
--- ld/testsuite/ld-arm/attr-merge-vfp-3.d (revision 0)
+++ ld/testsuite/ld-arm/attr-merge-vfp-3.d (revision 0)
@@ -0,0 +1,14 @@
+#source: attr-merge-vfp-3-d16.s
+#source: attr-merge-vfp-4-d16.s
+#as:
+#ld: -r
+#readelf: -A
+# This test is only valid on ELF based ports.
+# not-target: *-*-*coff *-*-pe *-*-wince *-*-*aout* *-*-netbsd *-*-riscix*
+
+Attribute Section: aeabi
+File Attributes
+  Tag_ARM_ISA_use: Yes
+  Tag_THUMB_ISA_use: Thumb-1
+  Tag_VFP_arch: VFPv4-D16
+  Tag_VFP_HP_extension: Allowed
Index: ld/testsuite/ld-arm/attr-merge-vfp-3.s
===================================================================
--- ld/testsuite/ld-arm/attr-merge-vfp-3.s (revision 0)
+++ ld/testsuite/ld-arm/attr-merge-vfp-3.s (revision 0)
@@ -0,0 +1,2 @@
+.fpu vfpv3
+
Index: ld/testsuite/ld-arm/attr-merge-vfp-4.d
===================================================================
--- ld/testsuite/ld-arm/attr-merge-vfp-4.d (revision 0)
+++ ld/testsuite/ld-arm/attr-merge-vfp-4.d (revision 0)
@@ -0,0 +1,14 @@
+#source: attr-merge-vfp-3.s
+#source: attr-merge-vfp-4-d16.s
+#as:
+#ld: -r
+#readelf: -A
+# This test is only valid on ELF based ports.
+# not-target: *-*-*coff *-*-pe *-*-wince *-*-*aout* *-*-netbsd *-*-riscix*
+
+Attribute Section: aeabi
+File Attributes
+  Tag_ARM_ISA_use: Yes
+  Tag_THUMB_ISA_use: Thumb-1
+  Tag_VFP_arch: VFPv4
+  Tag_VFP_HP_extension: Allowed
Index: ld/testsuite/ld-arm/attr-merge-vfp-4-d16.s
===================================================================
--- ld/testsuite/ld-arm/attr-merge-vfp-4-d16.s (revision 0)
+++ ld/testsuite/ld-arm/attr-merge-vfp-4-d16.s (revision 0)
@@ -0,0 +1,2 @@
+.fpu vfpv4-d16
+
Index: ld/testsuite/ld-arm/attr-merge-vfp-1r.d
===================================================================
--- ld/testsuite/ld-arm/attr-merge-vfp-1r.d (revision 0)
+++ ld/testsuite/ld-arm/attr-merge-vfp-1r.d (revision 0)
@@ -0,0 +1,13 @@
+#source: attr-merge-vfp-3-d16.s
+#source: attr-merge-vfp-2.s
+#as:
+#ld: -r
+#readelf: -A
+# This test is only valid on ELF based ports.
+# not-target: *-*-*coff *-*-pe *-*-wince *-*-*aout* *-*-netbsd *-*-riscix*
+
+Attribute Section: aeabi
+File Attributes
+  Tag_ARM_ISA_use: Yes
+  Tag_THUMB_ISA_use: Thumb-1
+  Tag_VFP_arch: VFPv3-D16
Index: ld/testsuite/ld-arm/attr-merge-vfp-4.s
===================================================================
--- ld/testsuite/ld-arm/attr-merge-vfp-4.s (revision 0)
+++ ld/testsuite/ld-arm/attr-merge-vfp-4.s (revision 0)
@@ -0,0 +1,2 @@
+.fpu vfpv4
+
Index: ld/testsuite/ld-arm/arm-elf.exp
===================================================================
--- ld/testsuite/ld-arm/arm-elf.exp (revision 267253)
+++ ld/testsuite/ld-arm/arm-elf.exp (working copy)
@@ -450,3 +450,13 @@ run_dump_test "unwind-2"
 run_dump_test "unwind-3"
 run_dump_test "unwind-4"
 run_dump_test "unwind-5"
+run_dump_test "attr-merge-vfp-1"
+run_dump_test "attr-merge-vfp-1r"
+run_dump_test "attr-merge-vfp-2"
+run_dump_test "attr-merge-vfp-2r"
+run_dump_test "attr-merge-vfp-3"
+run_dump_test "attr-merge-vfp-3r"
+run_dump_test "attr-merge-vfp-4"
+run_dump_test "attr-merge-vfp-4r"
+run_dump_test "attr-merge-vfp-5"
+run_dump_test "attr-merge-vfp-5r"
Index: ld/testsuite/ld-arm/attr-merge-vfp-5.d
===================================================================
--- ld/testsuite/ld-arm/attr-merge-vfp-5.d (revision 0)
+++ ld/testsuite/ld-arm/attr-merge-vfp-5.d (revision 0)
@@ -0,0 +1,14 @@
+#source: attr-merge-vfp-4.s
+#source: attr-merge-vfp-4-d16.s
+#as:
+#ld: -r
+#readelf: -A
+# This test is only valid on ELF based ports.
+# not-target: *-*-*coff *-*-pe *-*-wince *-*-*aout* *-*-netbsd *-*-riscix*
+
+Attribute Section: aeabi
+File Attributes
+  Tag_ARM_ISA_use: Yes
+  Tag_THUMB_ISA_use: Thumb-1
+  Tag_VFP_arch: VFPv4
+  Tag_VFP_HP_extension: Allowed
Index: gas/doc/c-arm.texi
===================================================================
--- gas/doc/c-arm.texi (revision 267253)
+++ gas/doc/c-arm.texi (working copy)
@@ -200,15 +200,22 @@ The following format options are recogni
 @code{vfp10-r0},
 @code{vfp9},
 @code{vfpxd},
-@code{vfpv2}
-@code{vfpv3}
-@code{vfpv3-d16}
+@code{vfpv2},
+@code{vfpv3},
+@code{vfpv3-fp16},
+@code{vfpv3-d16},
+@code{vfpv3-d16-fp16},
+@code{vfpv3xd},
+@code{vfpv3xd-d16},
+@code{vfpv4},
+@code{vfpv4-d16},
 @code{arm1020t},
 @code{arm1020e},
 @code{arm1136jf-s},
-@code{maverick}
+@code{maverick},
+@code{neon},
 and
-@code{neon}.
+@code{neon-vfpv4}.
 
 In addition to determining which instructions are assembled, this option
 also affects the way in which the @code{.double} assembler directive behaves
Index: gas/testsuite/gas/arm/attr-mfpu-vfpv4.d
===================================================================
--- gas/testsuite/gas/arm/attr-mfpu-vfpv4.d (revision 0)
+++ gas/testsuite/gas/arm/attr-mfpu-vfpv4.d (revision 0)
@@ -0,0 +1,13 @@
+# name: attributes for -mfpu=vfpv4
+# source: blank.s
+# as: -mfpu=vfpv4
+# readelf: -A
+# This test is only valid on EABI based ports.
+# target: *-*-*eabi
+
+Attribute Section: aeabi
+File Attributes
+  Tag_ARM_ISA_use: Yes
+  Tag_THUMB_ISA_use: Thumb-1
+  Tag_VFP_arch: VFPv4
+  Tag_VFP_HP_extension: Allowed
Index: gas/testsuite/gas/arm/neon-fma-cov.d
===================================================================
--- gas/testsuite/gas/arm/neon-fma-cov.d (revision 0)
+++ gas/testsuite/gas/arm/neon-fma-cov.d (revision 0)
@@ -0,0 +1,13 @@
+# name: Neon FMA instruction coverage
+# as: -mfpu=neon-vfpv4
+# objdump: -dr --prefix-addresses --show-raw-insn
+
+.*: +file format .*arm.*
+
+Disassembly of section \.text:
+0[0-9a-f]+ <[^>]+> f2000c50 vfma\.f32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2000c50 vfma\.f32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2000c10 vfma\.f32 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2200c50 vfms\.f32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2200c50 vfms\.f32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2200c10 vfms\.f32 d0, d0, d0
Index: gas/testsuite/gas/arm/neon-fma-cov.s
===================================================================
--- gas/testsuite/gas/arm/neon-fma-cov.s (revision 0)
+++ gas/testsuite/gas/arm/neon-fma-cov.s (revision 0)
@@ -0,0 +1,12 @@
+ .arm
+ .syntax unified
+ .text
+
+ .macro regs3_1 op opq vtype
+ \op\vtype q0,q0,q0
+ \opq\vtype q0,q0,q0
+ \op\vtype d0,d0,d0
+ .endm
+
+ regs3_1 vfma vfma .f32
+ regs3_1 vfms vfms .f32
Index: gas/testsuite/gas/arm/vfp-fma-arm.d
===================================================================
--- gas/testsuite/gas/arm/vfp-fma-arm.d (revision 0)
+++ gas/testsuite/gas/arm/vfp-fma-arm.d (revision 0)
@@ -0,0 +1,23 @@
+# name: FMA instructions, ARM mode
+# as: -mfpu=vfpv4 -I$srcdir/$subdir
+# objdump: -dr --prefix-addresses --show-raw-insn
+
+.*: +file format .*arm.*
+
+Disassembly of section .text:
+0[0-9a-f]+ <[^>]+> eea00a81 vfma\.f32 s0, s1, s2
+0[0-9a-f]+ <[^>]+> eea10b02 vfma\.f64 d0, d1, d2
+0[0-9a-f]+ <[^>]+> 0ea00a81 vfmaeq\.f32 s0, s1, s2
+0[0-9a-f]+ <[^>]+> 0ea10b02 vfmaeq\.f64 d0, d1, d2
+0[0-9a-f]+ <[^>]+> eea00ac1 vfms\.f32 s0, s1, s2
+0[0-9a-f]+ <[^>]+> eea10b42 vfms\.f64 d0, d1, d2
+0[0-9a-f]+ <[^>]+> 0ea00ac1 vfmseq\.f32 s0, s1, s2
+0[0-9a-f]+ <[^>]+> 0ea10b42 vfmseq\.f64 d0, d1, d2
+0[0-9a-f]+ <[^>]+> ee900ac1 vfnma\.f32 s0, s1, s2
+0[0-9a-f]+ <[^>]+> ee910b42 vfnma\.f64 d0, d1, d2
+0[0-9a-f]+ <[^>]+> 0e900ac1 vfnmaeq\.f32 s0, s1, s2
+0[0-9a-f]+ <[^>]+> 0e910b42 vfnmaeq\.f64 d0, d1, d2
+0[0-9a-f]+ <[^>]+> ee900a81 vfnms\.f32 s0, s1, s2
+0[0-9a-f]+ <[^>]+> ee910b02 vfnms\.f64 d0, d1, d2
+0[0-9a-f]+ <[^>]+> 0e900a81 vfnmseq\.f32 s0, s1, s2
+0[0-9a-f]+ <[^>]+> 0e910b02 vfnmseq\.f64 d0, d1, d2
Index: gas/testsuite/gas/arm/attr-mfpu-vfpv4-d16.d
===================================================================
--- gas/testsuite/gas/arm/attr-mfpu-vfpv4-d16.d (revision 0)
+++ gas/testsuite/gas/arm/attr-mfpu-vfpv4-d16.d (revision 0)
@@ -0,0 +1,13 @@
+# name: attributes for -mfpu=vfpv4-d16
+# source: blank.s
+# as: -mfpu=vfpv4-d16
+# readelf: -A
+# This test is only valid on EABI based ports.
+# target: *-*-*eabi
+
+Attribute Section: aeabi
+File Attributes
+  Tag_ARM_ISA_use: Yes
+  Tag_THUMB_ISA_use: Thumb-1
+  Tag_VFP_arch: VFPv4-D16
+  Tag_VFP_HP_extension: Allowed
Index: gas/testsuite/gas/arm/vfp-fma-inc.s
===================================================================
--- gas/testsuite/gas/arm/vfp-fma-inc.s (revision 0)
+++ gas/testsuite/gas/arm/vfp-fma-inc.s (revision 0)
@@ -0,0 +1,20 @@
+ .syntax unified
+
+ .include "itblock.s"
+
+func:
+ .macro dyadic op cond="" f32=".f32" f64=".f64"
+ itblock 2 \cond
+ \op\cond\f32 s0,s1,s2
+ \op\cond\f64 d0,d1,d2
+ .endm
+
+ .macro dyadic_c op
+ dyadic \op
+ dyadic \op eq
+ .endm
+
+ dyadic_c vfma
+ dyadic_c vfms
+ dyadic_c vfnma
+ dyadic_c vfnms
Index: gas/testsuite/gas/arm/vfp-fma-arm.s
===================================================================
--- gas/testsuite/gas/arm/vfp-fma-arm.s (revision 0)
+++ gas/testsuite/gas/arm/vfp-fma-arm.s (revision 0)
@@ -0,0 +1,2 @@
+ .arm
+ .include "vfp-fma-inc.s"
Index: gas/testsuite/gas/arm/vfma1.d
===================================================================
--- gas/testsuite/gas/arm/vfma1.d (revision 0)
+++ gas/testsuite/gas/arm/vfma1.d (revision 0)
@@ -0,0 +1,34 @@
+#objdump: -dr --prefix-addresses --show-raw-insn
+#name: VFMA decoding
+#as: -mcpu=arm7m
+
+# Test VFMA instruction disassembly
+
+.*: *file format .*arm.*
+
+
+Disassembly of section .text:
+00000000 <[^>]*> ee000a00 vmla.f32 s0, s0, s0
+00000004 <[^>]*> ee000b00 vmla.f64 d0, d0, d0
+00000008 <[^>]*> f2000d10 vmla.f32 d0, d0, d0
+0000000c <[^>]*> f2000d50 vmla.f32 q0, q0, q0
+00000010 <[^>]*> eea00a00 vfma.f32 s0, s0, s0
+00000014 <[^>]*> eea00b00 vfma.f64 d0, d0, d0
+00000018 <[^>]*> f2000c10 vfma.f32 d0, d0, d0
+0000001c <[^>]*> f2000c50 vfma.f32 q0, q0, q0
+00000020 <[^>]*> ee000a40 vmls.f32 s0, s0, s0
+00000024 <[^>]*> ee000b40 vmls.f64 d0, d0, d0
+00000028 <[^>]*> f2200d10 vmls.f32 d0, d0, d0
+0000002c <[^>]*> f2200d50 vmls.f32 q0, q0, q0
+00000030 <[^>]*> eea00a40 vfms.f32 s0, s0, s0
+00000034 <[^>]*> eea00b40 vfms.f64 d0, d0, d0
+00000038 <[^>]*> f2200c10 vfms.f32 d0, d0, d0
+0000003c <[^>]*> f2200c50 vfms.f32 q0, q0, q0
+00000040 <[^>]*> ee100a40 vnmla.f32 s0, s0, s0
+00000044 <[^>]*> ee100b40 vnmla.f64 d0, d0, d0
+00000048 <[^>]*> ee900a40 vfnma.f32 s0, s0, s0
+0000004c <[^>]*> ee900b40 vfnma.f64 d0, d0, d0
+00000050 <[^>]*> ee100a00 vnmls.f32 s0, s0, s0
+00000054 <[^>]*> ee100b00 vnmls.f64 d0, d0, d0
+00000058 <[^>]*> ee900a00 vfnms.f32 s0, s0, s0
+0000005c <[^>]*> ee900b00 vfnms.f64 d0, d0, d0
Index: gas/testsuite/gas/arm/vfma1.s
===================================================================
--- gas/testsuite/gas/arm/vfma1.s (revision 0)
+++ gas/testsuite/gas/arm/vfma1.s (revision 0)
@@ -0,0 +1,43 @@
+
+ .eabi_attribute Tag_Advanced_SIMD_arch, 2
+ .eabi_attribute Tag_VFP_arch, 6
+
+ @VMLA
+ .inst 0xee000a00 @ VFP  vmla.f32 s0,s0,s0
+ .inst 0xee000b00 @ VFP  vmla.f64 d0,d0,d0
+ .inst 0xf2000d10 @ NEON vmla.f32 d0,d0,d0
+ .inst 0xf2000d50 @ NEON vmla.f32 q0,q0,q0
+
+ @VFMA new
+ .inst 0xeea00a00 @ VFP  vfma.f32 s0,s0,s0
+ .inst 0xeea00b00 @ VFP  vfma.f64 d0,d0,d0
+ .inst 0xf2000c10 @ NEON vfma.f32 d0,d0,d0
+ .inst 0xf2000c50 @ NEON vfma.f32 q0,q0,q0
+
+ @VMLS
+ .inst 0xee000a40 @ VFP  vmls.F32 s0,s0,s0
+ .inst 0xee000b40 @ VFP  vmls.F64 d0,d0,d0
+ .inst 0xf2200d10 @ NEON vmls.F32 d0,d0,d0
+ .inst 0xf2200d50 @ NEON vmls.F32 q0,q0,q0
+
+ @VFMS new
+ .inst 0xeea00a40 @ VFP  vfms.F32 s0,s0,s0
+ .inst 0xeea00b40 @ VFP  vfms.F64 d0,d0,d0
+ .inst 0xf2200c10 @ NEON vfms.F32 d0,d0,d0
+ .inst 0xf2200c50 @ NEON vfms.F32 q0,q0,q0
+
+ @VNMLA
+ .inst 0xee100a40 @ VFP  vnmla.F32 s0,s0,s0
+ .inst 0xee100b40 @ VFP  vnmla.F64 d0,d0,d0
+
+ @VFNMA new
+ .inst 0xee900a40 @ VFP  vfnma.F32 s0,s0,s0
+ .inst 0xee900b40 @ VFP  vfnma.F64 d0,d0,d0
+
+ @VNMLS
+ .inst 0xee100a00 @ VFP  vnmls.F32 s0,s0,s0
+ .inst 0xee100b00 @ VFP  vnmls.F64 d0,d0,d0
+
+ @VFNMS new
+ .inst 0xee900a00 @ VFP  vfnms.F32 s0,s0,s0
+ .inst 0xee900b00 @ VFP  vfnms.F64 d0,d0,d0
Index: gas/testsuite/gas/arm/vfpv3xd.d
===================================================================
--- gas/testsuite/gas/arm/vfpv3xd.d (revision 0)
+++ gas/testsuite/gas/arm/vfpv3xd.d (revision 0)
@@ -0,0 +1,23 @@
+#objdump: -dr --prefix-addresses --show-raw-insn
+#name: VFP Double-precision load/store
+#as: -mfpu=vfpv3xd
+
+# Test the ARM VFP Double Precision load/store on single precision FPU
+
+.*: +file format .*arm.*
+
+Disassembly of section .text:
+0+[0-9a-f]* <[^>]*> ed900b00 vldr d0, \[r0\]
+0+[0-9a-f]* <[^>]*> ed800b00 vstr d0, \[r0\]
+0+[0-9a-f]* <[^>]*> ec900b02 vldmia r0, {d0}
+0+[0-9a-f]* <[^>]*> ec900b02 vldmia r0, {d0}
+0+[0-9a-f]* <[^>]*> ecb00b02 vldmia r0!, {d0}
+0+[0-9a-f]* <[^>]*> ecb00b02 vldmia r0!, {d0}
+0+[0-9a-f]* <[^>]*> ed300b02 vldmdb r0!, {d0}
+0+[0-9a-f]* <[^>]*> ed300b02 vldmdb r0!, {d0}
+0+[0-9a-f]* <[^>]*> ec800b02 vstmia r0, {d0}
+0+[0-9a-f]* <[^>]*> ec800b02 vstmia r0, {d0}
+0+[0-9a-f]* <[^>]*> eca00b02 vstmia r0!, {d0}
+0+[0-9a-f]* <[^>]*> eca00b02 vstmia r0!, {d0}
+0+[0-9a-f]* <[^>]*> ed200b02 vstmdb r0!, {d0}
+0+[0-9a-f]* <[^>]*> ed200b02 vstmdb r0!, {d0}
Index: gas/testsuite/gas/arm/vfpv3xd.s
===================================================================
--- gas/testsuite/gas/arm/vfpv3xd.s (revision 0)
+++ gas/testsuite/gas/arm/vfpv3xd.s (revision 0)
@@ -0,0 +1,19 @@
+# Check double precision load/store are allowed on single precision
+# implementation
+
+ fldd d0, [r0]
+ fstd d0, [r0]
+
+ fldmiad r0, {d0}
+ fldmfdd r0, {d0}
+ fldmiad r0!, {d0}
+ fldmfdd r0!, {d0}
+ fldmdbd r0!, {d0}
+ fldmead r0!, {d0}
+
+ fstmiad r0, {d0}
+ fstmead r0, {d0}
+ fstmiad r0!, {d0}
+ fstmead r0!, {d0}
+ fstmdbd r0!, {d0}
+ fstmfdd r0!, {d0}
Index: gas/testsuite/gas/arm/vfp-fma-thumb.d
===================================================================
--- gas/testsuite/gas/arm/vfp-fma-thumb.d (revision 0)
+++ gas/testsuite/gas/arm/vfp-fma-thumb.d (revision 0)
@@ -0,0 +1,27 @@
+# name: FMA instructions, Thumb mode
+# as: -mfpu=vfpv4 -I$srcdir/$subdir
+# objdump: -dr --prefix-addresses --show-raw-insn
+
+.*: +file format .*arm.*
+
+Disassembly of section .text:
+0[0-9a-f]+ <[^>]+> eea0 0a81 vfma\.f32 s0, s1, s2
+0[0-9a-f]+ <[^>]+> eea1 0b02 vfma\.f64 d0, d1, d2
+0[0-9a-f]+ <[^>]+> bf04       itt eq
+0[0-9a-f]+ <[^>]+> eea0 0a81 vfmaeq\.f32 s0, s1, s2
+0[0-9a-f]+ <[^>]+> eea1 0b02 vfmaeq\.f64 d0, d1, d2
+0[0-9a-f]+ <[^>]+> eea0 0ac1 vfms\.f32 s0, s1, s2
+0[0-9a-f]+ <[^>]+> eea1 0b42 vfms\.f64 d0, d1, d2
+0[0-9a-f]+ <[^>]+> bf04       itt eq
+0[0-9a-f]+ <[^>]+> eea0 0ac1 vfmseq\.f32 s0, s1, s2
+0[0-9a-f]+ <[^>]+> eea1 0b42 vfmseq\.f64 d0, d1, d2
+0[0-9a-f]+ <[^>]+> ee90 0ac1 vfnma\.f32 s0, s1, s2
+0[0-9a-f]+ <[^>]+> ee91 0b42 vfnma\.f64 d0, d1, d2
+0[0-9a-f]+ <[^>]+> bf04       itt eq
+0[0-9a-f]+ <[^>]+> ee90 0ac1 vfnmaeq\.f32 s0, s1, s2
+0[0-9a-f]+ <[^>]+> ee91 0b42 vfnmaeq\.f64 d0, d1, d2
+0[0-9a-f]+ <[^>]+> ee90 0a81 vfnms\.f32 s0, s1, s2
+0[0-9a-f]+ <[^>]+> ee91 0b02 vfnms\.f64 d0, d1, d2
+0[0-9a-f]+ <[^>]+> bf04       itt eq
+0[0-9a-f]+ <[^>]+> ee90 0a81 vfnmseq\.f32 s0, s1, s2
+0[0-9a-f]+ <[^>]+> ee91 0b02 vfnmseq\.f64 d0, d1, d2
Index: gas/testsuite/gas/arm/vfp-fma-thumb.s
===================================================================
--- gas/testsuite/gas/arm/vfp-fma-thumb.s (revision 0)
+++ gas/testsuite/gas/arm/vfp-fma-thumb.s (revision 0)
@@ -0,0 +1,2 @@
+ .thumb
+ .include "vfp-fma-inc.s"
Index: gas/config/tc-arm.c
===================================================================
--- gas/config/tc-arm.c (revision 267253)
+++ gas/config/tc-arm.c (working copy)
@@ -216,13 +216,16 @@ static const arm_feature_set fpu_vfp_ext
   ARM_FEATURE (0, FPU_VFP_EXT_V1xD);
 static const arm_feature_set fpu_vfp_ext_v1 = ARM_FEATURE (0, FPU_VFP_EXT_V1);
 static const arm_feature_set fpu_vfp_ext_v2 = ARM_FEATURE (0, FPU_VFP_EXT_V2);
+static const arm_feature_set fpu_vfp_ext_v3xd = ARM_FEATURE (0, FPU_VFP_EXT_V3xD);
 static const arm_feature_set fpu_vfp_ext_v3 = ARM_FEATURE (0, FPU_VFP_EXT_V3);
 static const arm_feature_set fpu_vfp_ext_d32 =
   ARM_FEATURE (0, FPU_VFP_EXT_D32);
 static const arm_feature_set fpu_neon_ext_v1 = ARM_FEATURE (0, FPU_NEON_EXT_V1);
 static const arm_feature_set fpu_vfp_v3_or_neon_ext =
   ARM_FEATURE (0, FPU_NEON_EXT_V1 | FPU_VFP_EXT_V3);
-static const arm_feature_set fpu_neon_fp16 = ARM_FEATURE (0, FPU_NEON_FP16);
+static const arm_feature_set fpu_vfp_fp16 = ARM_FEATURE (0, FPU_VFP_EXT_FP16);
+static const arm_feature_set fpu_neon_ext_fma = ARM_FEATURE (0, FPU_NEON_EXT_FMA);
+static const arm_feature_set fpu_vfp_ext_fma = ARM_FEATURE (0, FPU_VFP_EXT_FMA);
 
 static int mfloat_abi_opt = -1;
 /* Record user cpu selection for object attributes.  */
@@ -11293,6 +11296,8 @@ struct neon_tab_entry
      vcge / vcgt with the operands reversed.  */   \
   X(vclt, 0x0000300, 0x1200e00, 0x1b10200), \
   X(vcle, 0x0000310, 0x1000e00, 0x1b10180), \
+  X(vfma, N_INV, 0x0000c10, N_INV), \
+  X(vfms, N_INV, 0x0200c10, N_INV), \
   X(vmla, 0x0000900, 0x0000d10, 0x0800040), \
   X(vmls, 0x1000900, 0x0200d10, 0x0800440), \
   X(vmul, 0x0000910, 0x1000d10, 0x0800840), \
@@ -11330,6 +11335,8 @@ struct neon_tab_entry
   X(vnmul,      0xe200a40, 0xe200b40, N_INV), \
   X(vnmla,      0xe100a40, 0xe100b40, N_INV), \
   X(vnmls,      0xe100a00, 0xe100b00, N_INV), \
+  X(vfnma,      0xe900a40, 0xe900b40, N_INV), \
+  X(vfnms,      0xe900a00, 0xe900b00, N_INV), \
   X(vcmp, 0xeb40a40, 0xeb40b40, N_INV), \
   X(vcmpz, 0xeb50a40, 0xeb50b40, N_INV), \
   X(vcmpe, 0xeb40ac0, 0xeb40bc0, N_INV), \
@@ -12149,6 +12156,27 @@ do_vfp_nsyn_mla_mls (enum neon_shape rs)
 }
 
 static void
+do_vfp_nsyn_fma_fms (enum neon_shape rs)
+{
+  int is_fma = (inst.instruction & 0x0fffffff) == N_MNEM_vfma;
+
+  if (rs == NS_FFF)
+    {
+      if (is_fma)
+        do_vfp_nsyn_opcode ("ffmas");
+      else
+        do_vfp_nsyn_opcode ("ffnmas");
+    }
+  else
+    {
+      if (is_fma)
+        do_vfp_nsyn_opcode ("ffmad");
+      else
+        do_vfp_nsyn_opcode ("ffnmad");
+    }
+}
+
+static void
 do_vfp_nsyn_mul (enum neon_shape rs)
 {
   if (rs == NS_FFF)
@@ -13116,6 +13144,18 @@ do_neon_mac_maybe_scalar (void)
 }
 
 static void
+do_neon_fmac (void)
+{
+  if (try_vfp_nsyn (3, do_vfp_nsyn_fma_fms) == SUCCESS)
+    return;
+
+  if (vfp_or_neon_is_neon (NEON_CHECK_CC | NEON_CHECK_ARCH) == FAIL)
+    return;
+
+  neon_dyadic_misc (NT_untyped, N_IF_32, 0);
+}
+
+static void
 do_neon_tst (void)
 {
   enum neon_shape rs = neon_select_shape (NS_DDD, NS_QQQ, NS_NULL);
@@ -17179,6 +17219,19 @@ static const struct asm_opcode insns[] =
  cCE("fcmpes", eb40ac0, 2, (RVS, RVS),      vfp_sp_monadic),
  cCE("fcmpezs", eb50ac0, 1, (RVS),      vfp_sp_compare_z),
 
+ /* Double precision load/store are still present on single precision
+    implementations.  */
+ cCE("fldd", d100b00, 2, (RVD, ADDRGLDC),  vfp_dp_ldst),
+ cCE("fstd", d000b00, 2, (RVD, ADDRGLDC),  vfp_dp_ldst),
+ cCE("fldmiad", c900b00, 2, (RRw, VRDLST),    vfp_dp_ldstmia),
+ cCE("fldmfdd", c900b00, 2, (RRw, VRDLST),    vfp_dp_ldstmia),
+ cCE("fldmdbd", d300b00, 2, (RRw, VRDLST),    vfp_dp_ldstmdb),
+ cCE("fldmead", d300b00, 2, (RRw, VRDLST),    vfp_dp_ldstmdb),
+ cCE("fstmiad", c800b00, 2, (RRw, VRDLST),    vfp_dp_ldstmia),
+ cCE("fstmead", c800b00, 2, (RRw, VRDLST),    vfp_dp_ldstmia),
+ cCE("fstmdbd", d200b00, 2, (RRw, VRDLST),    vfp_dp_ldstmdb),
+ cCE("fstmfdd", d200b00, 2, (RRw, VRDLST),    vfp_dp_ldstmdb),
+
 #undef  ARM_VARIANT
 #define ARM_VARIANT  & fpu_vfp_ext_v1 /* VFP V1 (Double precision).  */
 
@@ -17197,18 +17250,6 @@ static const struct asm_opcode insns[] =
  cCE("ftouid", ebc0b40, 2, (RVS, RVD),      vfp_sp_dp_cvt),
  cCE("ftouizd", ebc0bc0, 2, (RVS, RVD),      vfp_sp_dp_cvt),
 
-  /* Memory operations. */
- cCE("fldd", d100b00, 2, (RVD, ADDRGLDC),  vfp_dp_ldst),
- cCE("fstd", d000b00, 2, (RVD, ADDRGLDC),  vfp_dp_ldst),
- cCE("fldmiad", c900b00, 2, (RRw, VRDLST),    vfp_dp_ldstmia),
- cCE("fldmfdd", c900b00, 2, (RRw, VRDLST),    vfp_dp_ldstmia),
- cCE("fldmdbd", d300b00, 2, (RRw, VRDLST),    vfp_dp_ldstmdb),
- cCE("fldmead", d300b00, 2, (RRw, VRDLST),    vfp_dp_ldstmdb),
- cCE("fstmiad", c800b00, 2, (RRw, VRDLST),    vfp_dp_ldstmia),
- cCE("fstmead", c800b00, 2, (RRw, VRDLST),    vfp_dp_ldstmia),
- cCE("fstmdbd", d200b00, 2, (RRw, VRDLST),    vfp_dp_ldstmdb),
- cCE("fstmfdd", d200b00, 2, (RRw, VRDLST),    vfp_dp_ldstmdb),
-
   /* Monadic operations.  */
  cCE("fabsd", eb00bc0, 2, (RVD, RVD),      vfp_dp_rd_rm),
  cCE("fnegd", eb10b40, 2, (RVD, RVD),      vfp_dp_rd_rm),
@@ -17535,29 +17576,52 @@ static const struct asm_opcode insns[] =
  nUF(vst4,      _vst4,    2, (NSTRLST, ADDR),  neon_ldx_stx),
 
 #undef  THUMB_VARIANT
+#define THUMB_VARIANT &fpu_vfp_ext_v3xd
+#undef ARM_VARIANT
+#define ARM_VARIANT &fpu_vfp_ext_v3xd
+ cCE("fconsts",   eb00a00, 2, (RVS, I255),      vfp_sp_const),
+ cCE("fshtos",    eba0a40, 2, (RVS, I16z),      vfp_sp_conv_16),
+ cCE("fsltos",    eba0ac0, 2, (RVS, I32),       vfp_sp_conv_32),
+ cCE("fuhtos",    ebb0a40, 2, (RVS, I16z),      vfp_sp_conv_16),
+ cCE("fultos",    ebb0ac0, 2, (RVS, I32),       vfp_sp_conv_32),
+ cCE("ftoshs",    ebe0a40, 2, (RVS, I16z),      vfp_sp_conv_16),
+ cCE("ftosls",    ebe0ac0, 2, (RVS, I32),       vfp_sp_conv_32),
+ cCE("ftouhs",    ebf0a40, 2, (RVS, I16z),      vfp_sp_conv_16),
+ cCE("ftouls",    ebf0ac0, 2, (RVS, I32),       vfp_sp_conv_32),
+
+#undef THUMB_VARIANT
 #define THUMB_VARIANT  & fpu_vfp_ext_v3
 #undef  ARM_VARIANT
 #define ARM_VARIANT    & fpu_vfp_ext_v3
 
- cCE("fconsts",   eb00a00, 2, (RVS, I255),      vfp_sp_const),
  cCE("fconstd",   eb00b00, 2, (RVD, I255),      vfp_dp_const),
- cCE("fshtos",    eba0a40, 2, (RVS, I16z),      vfp_sp_conv_16),
  cCE("fshtod",    eba0b40, 2, (RVD, I16z),      vfp_dp_conv_16),
- cCE("fsltos",    eba0ac0, 2, (RVS, I32),       vfp_sp_conv_32),
  cCE("fsltod",    eba0bc0, 2, (RVD, I32),       vfp_dp_conv_32),
- cCE("fuhtos",    ebb0a40, 2, (RVS, I16z),      vfp_sp_conv_16),
  cCE("fuhtod",    ebb0b40, 2, (RVD, I16z),      vfp_dp_conv_16),
- cCE("fultos",    ebb0ac0, 2, (RVS, I32),       vfp_sp_conv_32),
  cCE("fultod",    ebb0bc0, 2, (RVD, I32),       vfp_dp_conv_32),
- cCE("ftoshs",    ebe0a40, 2, (RVS, I16z),      vfp_sp_conv_16),
  cCE("ftoshd",    ebe0b40, 2, (RVD, I16z),      vfp_dp_conv_16),
- cCE("ftosls",    ebe0ac0, 2, (RVS, I32),       vfp_sp_conv_32),
  cCE("ftosld",    ebe0bc0, 2, (RVD, I32),       vfp_dp_conv_32),
- cCE("ftouhs",    ebf0a40, 2, (RVS, I16z),      vfp_sp_conv_16),
  cCE("ftouhd",    ebf0b40, 2, (RVD, I16z),      vfp_dp_conv_16),
- cCE("ftouls",    ebf0ac0, 2, (RVS, I32),       vfp_sp_conv_32),
  cCE("ftould",    ebf0bc0, 2, (RVD, I32),       vfp_dp_conv_32),
 
+#undef ARM_VARIANT
+#define ARM_VARIANT &fpu_vfp_ext_fma
+#undef THUMB_VARIANT
+#define THUMB_VARIANT &fpu_vfp_ext_fma
+ /* Mnemonics shared by Neon and VFP.  These are included in the
+    VFP FMA variant; NEON and VFP FMA always includes the NEON
+    FMA instructions.  */
+ nCEF(vfma,     _vfma,    3, (RNSDQ, oRNSDQ, RNSDQ), neon_fmac),
+ nCEF(vfms,     _vfms,    3, (RNSDQ, oRNSDQ, RNSDQ), neon_fmac),
+ /* ffmas/ffmad/ffmss/ffmsd are dummy mnemonics to satisfy gas;
+    the v form should always be used.  */
+ cCE("ffmas", ea00a00, 3, (RVS, RVS, RVS),  vfp_sp_dyadic),
+ cCE("ffnmas", ea00a40, 3, (RVS, RVS, RVS),  vfp_sp_dyadic),
+ cCE("ffmad", ea00b00, 3, (RVD, RVD, RVD),  vfp_dp_rd_rn_rm),
+ cCE("ffnmad", ea00b40, 3, (RVD, RVD, RVD),  vfp_dp_rd_rn_rm),
+ nCE(vfnma,     _vfnma,   3, (RVSD, RVSD, RVSD), vfp_nsyn_nmul),
+ nCE(vfnms,     _vfnms,   3, (RVSD, RVSD, RVSD), vfp_nsyn_nmul),
+
 #undef THUMB_VARIANT
 #undef  ARM_VARIANT
 #define ARM_VARIANT  & arm_cext_xscale /* Intel XScale extensions.  */
@@ -21967,7 +22031,11 @@ static const struct arm_option_cpu_value
   {"vfpxd", FPU_ARCH_VFP_V1xD},
   {"vfpv2", FPU_ARCH_VFP_V2},
   {"vfpv3", FPU_ARCH_VFP_V3},
+  {"vfpv3-fp16", FPU_ARCH_VFP_V3_FP16},
   {"vfpv3-d16", FPU_ARCH_VFP_V3D16},
+  {"vfpv3-d16-fp16", FPU_ARCH_VFP_V3D16_FP16},
+  {"vfpv3xd", FPU_ARCH_VFP_V3xD},
+  {"vfpv3xd-fp16", FPU_ARCH_VFP_V3xD_FP16},
   {"arm1020t", FPU_ARCH_VFP_V1},
   {"arm1020e", FPU_ARCH_VFP_V2},
   {"arm1136jfs", FPU_ARCH_VFP_V2},
@@ -21975,6 +22043,9 @@ static const struct arm_option_cpu_value
   {"maverick", FPU_ARCH_MAVERICK},
   {"neon",              FPU_ARCH_VFP_V3_PLUS_NEON_V1},
   {"neon-fp16", FPU_ARCH_NEON_FP16},
+  {"vfpv4", FPU_ARCH_VFP_V4},
+  {"vfpv4-d16", FPU_ARCH_VFP_V4D16},
+  {"neon-vfpv4", FPU_ARCH_NEON_VFP_V4},
   {NULL, ARM_ARCH_NONE}
 };
 
@@ -22453,8 +22524,10 @@ aeabi_set_public_attributes (void)
  }
       aeabi_set_attribute_string (Tag_CPU_name, p);
     }
+
   /* Tag_CPU_arch.  */
   aeabi_set_attribute_int (Tag_CPU_arch, arch);
+
   /* Tag_CPU_arch_profile.  */
   if (ARM_CPU_HAS_FEATURE (flags, arm_ext_v7a))
     aeabi_set_attribute_int (Tag_CPU_arch_profile, 'A');
@@ -22462,17 +22535,24 @@ aeabi_set_public_attributes (void)
     aeabi_set_attribute_int (Tag_CPU_arch_profile, 'R');
   else if (ARM_CPU_HAS_FEATURE (flags, arm_ext_m))
     aeabi_set_attribute_int (Tag_CPU_arch_profile, 'M');
+
   /* Tag_ARM_ISA_use.  */
   if (ARM_CPU_HAS_FEATURE (flags, arm_ext_v1)
       || arch == 0)
     aeabi_set_attribute_int (Tag_ARM_ISA_use, 1);
+
   /* Tag_THUMB_ISA_use.  */
   if (ARM_CPU_HAS_FEATURE (flags, arm_ext_v4t)
       || arch == 0)
     aeabi_set_attribute_int (Tag_THUMB_ISA_use,
  ARM_CPU_HAS_FEATURE (flags, arm_arch_t2) ? 2 : 1);
+
   /* Tag_VFP_arch.  */
-  if (ARM_CPU_HAS_FEATURE (flags, fpu_vfp_ext_d32))
+  if (ARM_CPU_HAS_FEATURE (flags, fpu_vfp_ext_fma))
+    aeabi_set_attribute_int (Tag_VFP_arch,
+     ARM_CPU_HAS_FEATURE (flags, fpu_vfp_ext_d32)
+     ? 5 : 6);
+  else if (ARM_CPU_HAS_FEATURE (flags, fpu_vfp_ext_d32))
     aeabi_set_attribute_int (Tag_VFP_arch, 3);
   else if (ARM_CPU_HAS_FEATURE (flags, fpu_vfp_ext_v3))
     aeabi_set_attribute_int (Tag_VFP_arch, 4);
@@ -22481,16 +22561,21 @@ aeabi_set_public_attributes (void)
   else if (ARM_CPU_HAS_FEATURE (flags, fpu_vfp_ext_v1)
            || ARM_CPU_HAS_FEATURE (flags, fpu_vfp_ext_v1xd))
     aeabi_set_attribute_int (Tag_VFP_arch, 1);
+
   /* Tag_WMMX_arch.  */
   if (ARM_CPU_HAS_FEATURE (flags, arm_cext_iwmmxt2))
     aeabi_set_attribute_int (Tag_WMMX_arch, 2);
   else if (ARM_CPU_HAS_FEATURE (flags, arm_cext_iwmmxt))
     aeabi_set_attribute_int (Tag_WMMX_arch, 1);
+
   /* Tag_Advanced_SIMD_arch (formerly Tag_NEON_arch).  */
   if (ARM_CPU_HAS_FEATURE (flags, fpu_neon_ext_v1))
-    aeabi_set_attribute_int (Tag_Advanced_SIMD_arch, 1);
+    aeabi_set_attribute_int
+      (Tag_Advanced_SIMD_arch, (ARM_CPU_HAS_FEATURE (flags, fpu_neon_ext_fma)
+ ? 2 : 1));
+  
   /* Tag_VFP_HP_extension (formerly Tag_NEON_FP16_arch).  */
-  if (ARM_CPU_HAS_FEATURE (flags, fpu_neon_fp16))
+  if (ARM_CPU_HAS_FEATURE (flags, fpu_vfp_fp16))
     aeabi_set_attribute_int (Tag_VFP_HP_extension, 1);
 }
 
Index: include/opcode/arm.h
===================================================================
--- include/opcode/arm.h (revision 267253)
+++ include/opcode/arm.h (working copy)
@@ -62,10 +62,13 @@
 #define FPU_VFP_EXT_V1xD 0x08000000 /* Base VFP instruction set.  */
 #define FPU_VFP_EXT_V1 0x04000000 /* Double-precision insns.    */
 #define FPU_VFP_EXT_V2 0x02000000 /* ARM10E VFPr1.      */
-#define FPU_VFP_EXT_V3 0x01000000 /* VFPv3 insns.              */
-#define FPU_NEON_EXT_V1 0x00800000 /* Neon (SIMD) insns.      */
-#define FPU_VFP_EXT_D32  0x00400000 /* Registers D16-D31.      */
-#define FPU_NEON_FP16 0x00200000 /* Half-precision extensions. */
+#define FPU_VFP_EXT_V3xD 0x01000000 /* VFPv3 single-precision.    */
+#define FPU_VFP_EXT_V3 0x00800000 /* VFPv3 double-precision.    */
+#define FPU_NEON_EXT_V1 0x00400000 /* Neon (SIMD) insns.      */
+#define FPU_VFP_EXT_D32  0x00200000 /* Registers D16-D31.      */
+#define FPU_VFP_EXT_FP16 0x00100000 /* Half-precision extensions. */
+#define FPU_NEON_EXT_FMA 0x00080000 /* Neon fused multiply-add    */
+#define FPU_VFP_EXT_FMA 0x00040000 /* VFP fused multiply-add     */
 
 /* Architectures are the sum of the base and extensions.  The ARM ARM (rev E)
    defines the following: ARMv3, ARMv3M, ARMv4xM, ARMv4, ARMv4TxM, ARMv4T,
@@ -120,9 +123,13 @@
 #define FPU_VFP_V1xD (FPU_VFP_EXT_V1xD | FPU_ENDIAN_PURE)
 #define FPU_VFP_V1 (FPU_VFP_V1xD | FPU_VFP_EXT_V1)
 #define FPU_VFP_V2 (FPU_VFP_V1 | FPU_VFP_EXT_V2)
-#define FPU_VFP_V3D16 (FPU_VFP_V2 | FPU_VFP_EXT_V3)
+#define FPU_VFP_V3D16 (FPU_VFP_V2 | FPU_VFP_EXT_V3xD | FPU_VFP_EXT_V3)
 #define FPU_VFP_V3 (FPU_VFP_V3D16 | FPU_VFP_EXT_D32)
+#define FPU_VFP_V3xD (FPU_VFP_V1xD | FPU_VFP_EXT_V2 | FPU_VFP_EXT_V3xD)
+#define FPU_VFP_V4D16 (FPU_VFP_V3D16 | FPU_VFP_EXT_FP16 | FPU_VFP_EXT_FMA)
+#define FPU_VFP_V4 (FPU_VFP_V3 | FPU_VFP_EXT_FP16 | FPU_VFP_EXT_FMA)
 #define FPU_VFP_HARD (FPU_VFP_EXT_V1xD | FPU_VFP_EXT_V1 | FPU_VFP_EXT_V2 \
+ | FPU_VFP_EXT_V3xD | FPU_VFP_EXT_FMA | FPU_NEON_EXT_FMA \
                          | FPU_VFP_EXT_V3 | FPU_NEON_EXT_V1 | FPU_VFP_EXT_D32)
 #define FPU_FPA (FPU_FPA_EXT_V1 | FPU_FPA_EXT_V2)
 
@@ -136,13 +143,22 @@
 #define FPU_ARCH_VFP_V1  ARM_FEATURE (0, FPU_VFP_V1)
 #define FPU_ARCH_VFP_V2  ARM_FEATURE (0, FPU_VFP_V2)
 #define FPU_ARCH_VFP_V3D16 ARM_FEATURE (0, FPU_VFP_V3D16)
+#define FPU_ARCH_VFP_V3D16_FP16 \
+  ARM_FEATURE (0, FPU_VFP_V3D16 | FPU_VFP_EXT_FP16)
 #define FPU_ARCH_VFP_V3  ARM_FEATURE (0, FPU_VFP_V3)
+#define FPU_ARCH_VFP_V3_FP16 ARM_FEATURE (0, FPU_VFP_V3 | FPU_VFP_EXT_FP16)
+#define FPU_ARCH_VFP_V3xD ARM_FEATURE (0, FPU_VFP_V3xD)
+#define FPU_ARCH_VFP_V3xD_FP16 ARM_FEATURE (0, FPU_VFP_V3xD | FPU_VFP_EXT_FP16)
 #define FPU_ARCH_NEON_V1  ARM_FEATURE (0, FPU_NEON_EXT_V1)
 #define FPU_ARCH_VFP_V3_PLUS_NEON_V1 \
   ARM_FEATURE (0, FPU_VFP_V3 | FPU_NEON_EXT_V1)
 #define FPU_ARCH_NEON_FP16 \
-  ARM_FEATURE (0, FPU_VFP_V3 | FPU_NEON_EXT_V1 | FPU_NEON_FP16)
+  ARM_FEATURE (0, FPU_VFP_V3 | FPU_NEON_EXT_V1 | FPU_VFP_EXT_FP16)
 #define FPU_ARCH_VFP_HARD ARM_FEATURE (0, FPU_VFP_HARD)
+#define FPU_ARCH_VFP_V4 ARM_FEATURE(0, FPU_VFP_V4)
+#define FPU_ARCH_VFP_V4D16 ARM_FEATURE(0, FPU_VFP_V4D16)
+#define FPU_ARCH_NEON_VFP_V4 \
+  ARM_FEATURE(0, FPU_VFP_V4 | FPU_NEON_EXT_V1 | FPU_NEON_EXT_FMA)
 
 #define FPU_ARCH_ENDIAN_PURE ARM_FEATURE (0, FPU_ENDIAN_PURE)
 
Index: binutils/readelf.c
===================================================================
--- binutils/readelf.c (revision 267253)
+++ binutils/readelf.c (working copy)
@@ -8967,7 +8967,7 @@ static const char * arm_attr_tag_ARM_ISA
 static const char * arm_attr_tag_THUMB_ISA_use[] =
   {"No", "Thumb-1", "Thumb-2"};
 static const char * arm_attr_tag_VFP_arch[] =
-  {"No", "VFPv1", "VFPv2", "VFPv3", "VFPv3-D16"};
+  {"No", "VFPv1", "VFPv2", "VFPv3", "VFPv3-D16", "VFPv4", "VFPv4-D16"};
 static const char * arm_attr_tag_WMMX_arch[] = {"No", "WMMXv1", "WMMXv2"};
 static const char * arm_attr_tag_Advanced_SIMD_arch[] = {"No", "NEONv1"};
 static const char * arm_attr_tag_PCS_config[] =
Index: bfd/elf32-arm.c
===================================================================
--- bfd/elf32-arm.c (revision 267253)
+++ bfd/elf32-arm.c (working copy)
@@ -9730,8 +9730,6 @@ elf32_arm_merge_eabi_attributes (bfd *ib
   /* Some tags have 0 = don't care, 1 = strong requirement,
      2 = weak requirement.  */
   static const int order_021[3] = {0, 2, 1};
-  /* For use with Tag_VFP_arch.  */
-  static const int order_01243[5] = {0, 1, 2, 4, 3};
   int i;
   bfd_boolean result = TRUE;
 
@@ -9923,12 +9921,50 @@ elf32_arm_merge_eabi_attributes (bfd *ib
     }
   break;
  case Tag_VFP_arch:
-  /* Use the "greatest" from the sequence 0, 1, 2, 4, 3, or the
-     largest value if greater than 4 (for future-proofing).  */
-  if ((in_attr[i].i > 4 && in_attr[i].i > out_attr[i].i)
-      || (in_attr[i].i <= 4 && out_attr[i].i <= 4
-  && order_01243[in_attr[i].i] > order_01243[out_attr[i].i]))
-    out_attr[i].i = in_attr[i].i;
+    {
+      static const struct
+      {
+  int ver;
+  int regs;
+      } vfp_versions[7] =
+ {
+  {0, 0},
+  {1, 16},
+  {2, 16},
+  {3, 32},
+  {3, 16},
+  {4, 32},
+  {4, 16}
+ };
+      int ver;
+      int regs;
+      int newval;
+
+      /* Values greater than 6 aren't defined, so just pick the
+         biggest */
+      if (in_attr[i].i > 6 && in_attr[i].i > out_attr[i].i)
+ {
+  out_attr[i] = in_attr[i];
+  break;
+ }
+      /* The output uses the superset of input features
+ (ISA version) and registers.  */
+      ver = vfp_versions[in_attr[i].i].ver;
+      if (ver < vfp_versions[out_attr[i].i].ver)
+ ver = vfp_versions[out_attr[i].i].ver;
+      regs = vfp_versions[in_attr[i].i].regs;
+      if (regs < vfp_versions[out_attr[i].i].regs)
+ regs = vfp_versions[out_attr[i].i].regs;
+      /* This assumes all possible supersets are also a valid
+         options.  */
+      for (newval = 6; newval > 0; newval--)
+ {
+  if (regs == vfp_versions[newval].regs
+      && ver == vfp_versions[newval].ver)
+    break;
+ }
+      out_attr[i].i = newval;
+    }
   break;
  case Tag_PCS_config:
   if (out_attr[i].i == 0)
Index: opcodes/arm-dis.c
===================================================================
--- opcodes/arm-dis.c (revision 267253)
+++ opcodes/arm-dis.c (working copy)
@@ -289,8 +289,8 @@ static const struct opcode32 coprocessor
   {FPU_NEON_EXT_V1, 0x0e400b10, 0x0fd00f10, "vmov%c.8\t%16-19,7D[%5,6,21d], %12-15r"},
   {FPU_NEON_EXT_V1, 0x0e500b10, 0x0f500f10, "vmov%c.%23?us8\t%12-15r, %16-19,7D[%5,6,21d]"},
   /* Half-precision conversion instructions.  */
-  {FPU_NEON_FP16,   0x0eb20a40, 0x0fbf0f50, "vcvt%7?tb%c.f32.f16\t%y1, %y0"},
-  {FPU_NEON_FP16,   0x0eb30a40, 0x0fbf0f50, "vcvt%7?tb%c.f16.f32\t%y1, %y0"},
+  {FPU_VFP_EXT_FP16, 0x0eb20a40, 0x0fbf0f50, "vcvt%7?tb%c.f32.f16\t%y1, %y0"},
+  {FPU_VFP_EXT_FP16, 0x0eb30a40, 0x0fbf0f50, "vcvt%7?tb%c.f16.f32\t%y1, %y0"},
 
   /* Floating point coprocessor (VFP) instructions.  */
   {FPU_VFP_EXT_V1xD, 0x0ee00a10, 0x0fff0fff, "vmsr%c\tfpsid, %12-15r"},
@@ -330,14 +330,14 @@ static const struct opcode32 coprocessor
   {FPU_VFP_EXT_V1, 0x0eb80b40, 0x0fbf0f50, "vcvt%c.f64.%7?su32\t%z1, %y0"},
   {FPU_VFP_EXT_V1xD, 0x0eb40a40, 0x0fbf0f50, "vcmp%7'e%c.f32\t%y1, %y0"},
   {FPU_VFP_EXT_V1, 0x0eb40b40, 0x0fbf0f50, "vcmp%7'e%c.f64\t%z1, %z0"},
-  {FPU_VFP_EXT_V3, 0x0eba0a40, 0x0fbe0f50, "vcvt%c.f32.%16?us%7?31%7?26\t%y1, %y1, #%5,0-3k"},
+  {FPU_VFP_EXT_V3xD, 0x0eba0a40, 0x0fbe0f50, "vcvt%c.f32.%16?us%7?31%7?26\t%y1, %y1, #%5,0-3k"},
   {FPU_VFP_EXT_V3, 0x0eba0b40, 0x0fbe0f50, "vcvt%c.f64.%16?us%7?31%7?26\t%z1, %z1, #%5,0-3k"},
   {FPU_VFP_EXT_V1xD, 0x0ebc0a40, 0x0fbe0f50, "vcvt%7`r%c.%16?su32.f32\t%y1, %y0"},
   {FPU_VFP_EXT_V1, 0x0ebc0b40, 0x0fbe0f50, "vcvt%7`r%c.%16?su32.f64\t%y1, %z0"},
-  {FPU_VFP_EXT_V3, 0x0ebe0a40, 0x0fbe0f50, "vcvt%c.%16?us%7?31%7?26.f32\t%y1, %y1, #%5,0-3k"},
+  {FPU_VFP_EXT_V3xD, 0x0ebe0a40, 0x0fbe0f50, "vcvt%c.%16?us%7?31%7?26.f32\t%y1, %y1, #%5,0-3k"},
   {FPU_VFP_EXT_V3, 0x0ebe0b40, 0x0fbe0f50, "vcvt%c.%16?us%7?31%7?26.f64\t%z1, %z1, #%5,0-3k"},
   {FPU_VFP_EXT_V1, 0x0c500b10, 0x0fb00ff0, "vmov%c\t%12-15r, %16-19r, %z0"},
-  {FPU_VFP_EXT_V3, 0x0eb00a00, 0x0fb00ff0, "vmov%c.f32\t%y1, #%0-3,16-19d"},
+  {FPU_VFP_EXT_V3xD, 0x0eb00a00, 0x0fb00ff0, "vmov%c.f32\t%y1, #%0-3,16-19d"},
   {FPU_VFP_EXT_V3, 0x0eb00b00, 0x0fb00ff0, "vmov%c.f64\t%z1, #%0-3,16-19d"},
   {FPU_VFP_EXT_V2, 0x0c400a10, 0x0ff00fd0, "vmov%c\t%y4, %12-15r, %16-19r"},
   {FPU_VFP_EXT_V2, 0x0c400b10, 0x0ff00fd0, "vmov%c\t%z0, %12-15r, %16-19r"},
@@ -447,6 +447,16 @@ static const struct opcode32 coprocessor
   {ARM_CEXT_MAVERICK, 0x0e200600, 0x0ff00f10, "cfmadda32%c\tmvax%5-7d, mvax%12-15d, mvfx%16-19d, mvfx%0-3d"},
   {ARM_CEXT_MAVERICK, 0x0e300600, 0x0ff00f10, "cfmsuba32%c\tmvax%5-7d, mvax%12-15d, mvfx%16-19d, mvfx%0-3d"},
 
+  /* VFP Fused multiply add instructions.  */
+  {FPU_VFP_EXT_FMA, 0x0ea00a00, 0x0fb00f50, "vfma%c.f32\t%y1, %y2, %y0"},
+  {FPU_VFP_EXT_FMA, 0x0ea00b00, 0x0fb00f50, "vfma%c.f64\t%z1, %z2, %z0"},
+  {FPU_VFP_EXT_FMA, 0x0ea00a40, 0x0fb00f50, "vfms%c.f32\t%y1, %y2, %y0"},
+  {FPU_VFP_EXT_FMA, 0x0ea00b40, 0x0fb00f50, "vfms%c.f64\t%z1, %z2, %z0"},
+  {FPU_VFP_EXT_FMA, 0x0e900a40, 0x0fb00f50, "vfnma%c.f32\t%y1, %y2, %y0"},
+  {FPU_VFP_EXT_FMA, 0x0e900b40, 0x0fb00f50, "vfnma%c.f64\t%z1, %z2, %z0"},
+  {FPU_VFP_EXT_FMA, 0x0e900a00, 0x0fb00f50, "vfnms%c.f32\t%y1, %y2, %y0"},
+  {FPU_VFP_EXT_FMA, 0x0e900b00, 0x0fb00f50, "vfnms%c.f64\t%z1, %z2, %z0"},
+
   /* Generic coprocessor instructions.  */
   { 0, SENTINEL_GENERIC_START, 0, "" },
   {ARM_EXT_V5E, 0x0c400000, 0x0ff00000, "mcrr%c\t%8-11d, %4-7d, %12-15r, %16-19r, cr%0-3d"},
@@ -517,8 +527,12 @@ static const struct opcode32 neon_opcode
   {FPU_NEON_EXT_V1, 0xf3b00840, 0xffb00c50, "vtbx%c.8\t%12-15,22D, %F, %0-3,5D"},
   
   /* Half-precision conversions.  */
-  {FPU_NEON_FP16,   0xf3b60600, 0xffbf0fd0, "vcvt%c.f16.f32\t%12-15,22D, %0-3,5Q"},
-  {FPU_NEON_FP16,   0xf3b60700, 0xffbf0fd0, "vcvt%c.f32.f16\t%12-15,22Q, %0-3,5D"},
+  {FPU_VFP_EXT_FP16, 0xf3b60600, 0xffbf0fd0, "vcvt%c.f16.f32\t%12-15,22D, %0-3,5Q"},
+  {FPU_VFP_EXT_FP16, 0xf3b60700, 0xffbf0fd0, "vcvt%c.f32.f16\t%12-15,22Q, %0-3,5D"},
+
+  /* NEON fused multiply add instructions.  */
+  {FPU_NEON_EXT_FMA, 0xf2000c10, 0xffa00f10, "vfma%c.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"},
+  {FPU_NEON_EXT_FMA, 0xf2200c10, 0xffa00f10, "vfms%c.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"},
 
   /* Two registers, miscellaneous.  */
   {FPU_NEON_EXT_V1, 0xf2880a10, 0xfebf0fd0, "vmovl%c.%24?us8\t%12-15,22Q, %0-3,5D"},