if (!MI->definesRegister(DstRegD, TRI) || MI->readsRegister(DstRegD, TRI))
return false;
- // All clear, widen the COPY. Preserve the implicit operands, even if they
- // may be superfluous now.
+ // A dead copy shouldn't show up here, but reject it just in case.
+ if (MI->getOperand(0).isDead())
+ return false;
+
+ // All clear, widen the COPY.
DEBUG(dbgs() << "widening: " << *MI);
+
+ // Get rid of the old <imp-def> of DstRegD. Leave it if it defines a Q-reg
+ // or some other super-register.
+ int ImpDefIdx = MI->findRegisterDefOperandIdx(DstRegD);
+ if (ImpDefIdx != -1)
+ MI->RemoveOperand(ImpDefIdx);
+
+ // Change the opcode and operands.
MI->setDesc(get(ARM::VMOVD));
MI->getOperand(0).setReg(DstRegD);
MI->getOperand(1).setReg(SrcRegD);
AddDefaultPred(MachineInstrBuilder(MI));
+
+ // We are now reading SrcRegD instead of SrcRegS. This may upset the
+ // register scavenger and machine verifier, so we need to indicate that we
+ // are reading an undefined value from SrcRegD, but a proper value from
+ // SrcRegS.
+ MI->getOperand(1).setIsUndef();
+ MachineInstrBuilder(MI).addReg(SrcRegS, RegState::Implicit);
+
+ // SrcRegD may actually contain an unrelated value in the ssub_1
+ // sub-register. Don't kill it. Only kill the ssub_0 sub-register.
+ if (MI->getOperand(1).isKill()) {
+ MI->getOperand(1).setIsKill(false);
+ MI->addRegisterKilled(SrcRegS, TRI, true);
+ }
+
DEBUG(dbgs() << "replaced by: " << *MI);
return true;
}
// Add the extra source operand and new predicates.
// This will go before any implicit ops.
- AddDefaultPred(MachineInstrBuilder(MI).addReg(MI->getOperand(1).getReg()));
+ AddDefaultPred(MachineInstrBuilder(MI).addOperand(MI->getOperand(1)));
}
--- /dev/null
+; RUN: llc < %s -widen-vmovs -mcpu=cortex-a8 -verify-machineinstrs | FileCheck %s
+target triple = "thumbv7-apple-ios"
+
+; The 0.0 constant is loaded from the constant pool and kept in a register.
+; CHECK: %entry
+; CHECK: vldr.32 s
+; The float loop variable is initialized with a vmovs from the constant register.
+; The vmovs is first widened to a vmovd, and then converted to a vorr because of the v2f32 vadd.f32.
+; CHECK: vorr [[DL:d[0-9]+]], [[DN:d[0-9]+]]
+; CHECK: , [[DN]]
+; CHECK: %for.body.i
+; CHECK: vadd.f32 [[DL]], [[DL]], [[DN]]
+;
+; This test is verifying:
+; - The VMOVS widening is happening.
+; - Register liveness is verified.
+; - The execution domain switch to vorr works across basic blocks.
+
+define void @Mm() nounwind {
+entry:
+ br label %for.body4
+
+for.body4:
+ br label %for.body.i
+
+for.body.i:
+ %tmp3.i = phi float [ 0.000000e+00, %for.body4 ], [ %add.i, %for.body.i ]
+ %add.i = fadd float %tmp3.i, 0.000000e+00
+ %exitcond.i = icmp eq i32 undef, 41
+ br i1 %exitcond.i, label %rInnerproduct.exit, label %for.body.i
+
+rInnerproduct.exit:
+ store float %add.i, float* undef, align 4
+ br label %for.body4
+}