When using NEON for single-precision FP, the NEON result must be placed in D0-D15...
authorDavid Goodwin <david_goodwin@apple.com>
Wed, 5 Aug 2009 21:02:22 +0000 (21:02 +0000)
committerDavid Goodwin <david_goodwin@apple.com>
Wed, 5 Aug 2009 21:02:22 +0000 (21:02 +0000)
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@78244 91177308-0d34-0410-b5e6-96231b3b80d8

lib/Target/ARM/ARMBaseInstrInfo.cpp
lib/Target/ARM/ARMInstrNEON.td
lib/Target/ARM/ARMRegisterInfo.td

index 3819b430b2b73622f57467f3875c4037570866fb..9c5f3aab9b8bc125aa1da2525c29d6edf3e2b57e 100644 (file)
@@ -610,23 +610,29 @@ ARMBaseInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
   if (I != MBB.end()) DL = I->getDebugLoc();
 
   if (DestRC != SrcRC) {
-    // Not yet supported!
-    return false;
+    if (((DestRC == ARM::DPRRegisterClass) && (SrcRC == ARM::DPR_VFP2RegisterClass)) ||
+        ((SrcRC == ARM::DPRRegisterClass) && (DestRC == ARM::DPR_VFP2RegisterClass))) {
+      // Allow copy between DPR and DPR_VFP2.
+    } else {
+      return false;
+    }
   }
 
-  if (DestRC == ARM::GPRRegisterClass)
+  if (DestRC == ARM::GPRRegisterClass) {
     AddDefaultCC(AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::MOVr),
                                         DestReg).addReg(SrcReg)));
-  else if (DestRC == ARM::SPRRegisterClass)
+  } else if (DestRC == ARM::SPRRegisterClass) {
     AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FCPYS), DestReg)
                    .addReg(SrcReg));
-  else if (DestRC == ARM::DPRRegisterClass)
+  } else if ((DestRC == ARM::DPRRegisterClass) ||
+             (DestRC == ARM::DPR_VFP2RegisterClass)) {
     AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FCPYD), DestReg)
                    .addReg(SrcReg));
-  else if (DestRC == ARM::QPRRegisterClass)
+  } else if (DestRC == ARM::QPRRegisterClass) {
     BuildMI(MBB, I, DL, get(ARM::VMOVQ), DestReg).addReg(SrcReg);
-  else
+  } else {
     return false;
+  }
 
   return true;
 }
index 5cf81ee6af2568906ad2d07e7bd10dfe512b6692..d31ec416ea0f596fbfeef6747762ad6ce4dbb73e 100644 (file)
@@ -285,9 +285,11 @@ class N2VQInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
 // Basic 2-register operations, scalar single-precision
 class N2VDInts<SDNode OpNode, NeonI Inst>
   : NEONFPPat<(f32 (OpNode SPR:$a)),
-              (EXTRACT_SUBREG (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
-                                     SPR:$a, arm_ssubreg_0)),
-              arm_ssubreg_0)>;
+              (EXTRACT_SUBREG (COPY_TO_REGCLASS 
+                  (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), 
+                                        SPR:$a, arm_ssubreg_0)),
+                               DPR_VFP2),
+               arm_ssubreg_0)>;
 
 // Narrow 2-register intrinsics.
 class N2VNInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
@@ -329,11 +331,13 @@ class N3VQ<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
 // Basic 3-register operations, scalar single-precision
 class N3VDs<SDNode OpNode, NeonI Inst>
   : NEONFPPat<(f32 (OpNode SPR:$a, SPR:$b)),
-              (EXTRACT_SUBREG (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
-                                                   SPR:$a, arm_ssubreg_0),
-                                    (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
-                                                   SPR:$b, arm_ssubreg_0)),
-              arm_ssubreg_0)>;
+              (EXTRACT_SUBREG (COPY_TO_REGCLASS
+                  (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
+                                        SPR:$a, arm_ssubreg_0),
+                        (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
+                                        SPR:$b, arm_ssubreg_0)),
+                               DPR_VFP2),
+               arm_ssubreg_0)>;
 
 // Basic 3-register intrinsics, both double- and quad-register.
 class N3VDInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
@@ -375,12 +379,14 @@ class N3VQMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
 class N3VDMulOps<SDNode MulNode, SDNode OpNode, NeonI Inst>
   : NEONFPPat<(f32 (OpNode SPR:$acc, 
                        (f32 (MulNode SPR:$a, SPR:$b)))),
-              (EXTRACT_SUBREG (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
-                                                   SPR:$acc, arm_ssubreg_0),
-                                    (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
-                                                   SPR:$a, arm_ssubreg_0),
-                                    (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
-                                                   SPR:$b, arm_ssubreg_0)),
+              (EXTRACT_SUBREG (COPY_TO_REGCLASS
+                  (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
+                                        SPR:$acc, arm_ssubreg_0),
+                        (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
+                                        SPR:$a, arm_ssubreg_0),
+                        (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
+                                        SPR:$b, arm_ssubreg_0)),
+                               DPR_VFP2),
                arm_ssubreg_0)>;
 
 // Neon 3-argument intrinsics, both double- and quad-register.
index 77db711fb7b12ab1b733bbbbc757cb3500b67f83..651eae54d44241b4e6c78b2ccc9c6b3c511b0293 100644 (file)
@@ -305,6 +305,14 @@ def DPR : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32], 64,
   }];
 }
 
+// Subset of DPR that are accessible with VFP2 (and so that also have
+// 32-bit SPR subregs).
+def DPR_VFP2 : RegisterClass<"ARM", [f64, v2f32], 64,
+                             [D0,  D1,  D2,  D3,  D4,  D5,  D6,  D7,
+                              D8,  D9,  D10, D11, D12, D13, D14, D15]> {
+  let SubRegClassList = [SPR, SPR];
+}
+
 // Generic 128-bit vector register class.
 def QPR : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], 128,
                         [Q0,  Q1,  Q2,  Q3,  Q4,  Q5,  Q6,  Q7,