}
}
+/// PropagateOrdering - Propagate SDNode ordering information from \p Old to
+/// \p New. Generally, this just means copying the ordering value, but if the
+/// new node is actually a recycled node with a lower ordering already, then
+/// we do not want to propagate the new (higher) ordering.
+void DAGTypeLegalizer::PropagateOrdering(SDNode *Old, SDNode *New) {
+ unsigned OldOrder = DAG.GetOrdering(Old);
+ unsigned NewOrder = DAG.GetOrdering(New);
+ if (NewOrder == 0 || (NewOrder > 0 && OldOrder < NewOrder))
+ DAG.AssignOrdering(New, OldOrder);
+}
+
namespace {
/// NodeUpdateListener - This class is a DAGUpdateListener that listens for
/// updates to nodes and recomputes their ready state.
SDValue &OpEntry = PromotedIntegers[Op];
assert(OpEntry.getNode() == 0 && "Node is already promoted!");
OpEntry = Result;
+
+ // Propagate node ordering
+ PropagateOrdering(Op.getNode(), Result.getNode());
}
void DAGTypeLegalizer::SetSoftenedFloat(SDValue Op, SDValue Result) {
SDValue &OpEntry = SoftenedFloats[Op];
assert(OpEntry.getNode() == 0 && "Node is already converted to integer!");
OpEntry = Result;
+
+ // Propagate node ordering
+ PropagateOrdering(Op.getNode(), Result.getNode());
}
void DAGTypeLegalizer::SetScalarizedVector(SDValue Op, SDValue Result) {
SDValue &OpEntry = ScalarizedVectors[Op];
assert(OpEntry.getNode() == 0 && "Node is already scalarized!");
OpEntry = Result;
+
+ // Propagate node ordering
+ PropagateOrdering(Op.getNode(), Result.getNode());
}
void DAGTypeLegalizer::GetExpandedInteger(SDValue Op, SDValue &Lo,
assert(Entry.first.getNode() == 0 && "Node already expanded");
Entry.first = Lo;
Entry.second = Hi;
+
+ // Propagate ordering
+ PropagateOrdering(Op.getNode(), Lo.getNode());
+ PropagateOrdering(Op.getNode(), Hi.getNode());
}
void DAGTypeLegalizer::GetExpandedFloat(SDValue Op, SDValue &Lo,
assert(Entry.first.getNode() == 0 && "Node already expanded");
Entry.first = Lo;
Entry.second = Hi;
+
+ // Propagate ordering
+ PropagateOrdering(Op.getNode(), Lo.getNode());
+ PropagateOrdering(Op.getNode(), Hi.getNode());
}
void DAGTypeLegalizer::GetSplitVector(SDValue Op, SDValue &Lo,
assert(Entry.first.getNode() == 0 && "Node already split");
Entry.first = Lo;
Entry.second = Hi;
+
+ // Propagate ordering
+ PropagateOrdering(Op.getNode(), Lo.getNode());
+ PropagateOrdering(Op.getNode(), Hi.getNode());
}
void DAGTypeLegalizer::SetWidenedVector(SDValue Op, SDValue Result) {
SDValue &OpEntry = WidenedVectors[Op];
assert(OpEntry.getNode() == 0 && "Node already widened!");
OpEntry = Result;
+
+ // Propagate node ordering
+ PropagateOrdering(Op.getNode(), Result.getNode());
}
// Make everything that once used N's values now use those in Results instead.
assert(Results.size() == N->getNumValues() &&
"Custom lowering returned the wrong number of results!");
- for (unsigned i = 0, e = Results.size(); i != e; ++i)
+ for (unsigned i = 0, e = Results.size(); i != e; ++i) {
ReplaceValueWith(SDValue(N, i), Results[i]);
+ // Propagate node ordering
+ DAG.AssignOrdering(Results[i].getNode(), DAG.GetOrdering(N));
+ }
return true;
}
void ExpungeNode(SDNode *N);
void PerformExpensiveChecks();
void RemapValue(SDValue &N);
+ void PropagateOrdering(SDNode *Old, SDNode *New);
// Common routines.
SDValue BitConvertToInteger(SDValue Op);
if (ResNode == Node || Node->getOpcode() == ISD::DELETED_NODE)
continue;
// Replace node.
- if (ResNode)
+ if (ResNode) {
+ CurDAG->AssignOrdering(ResNode, CurDAG->GetOrdering(Node));
ReplaceUses(Node, ResNode);
+ }
// If after the replacement this node is not used any more,
// remove this dead node.
--- /dev/null
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
+
+; Ensure source scheduling is working
+
+define void @foo(i32* %a) {
+; CHECK: .func foo
+; CHECK: ld.u32
+; CHECK-NEXT: ld.u32
+; CHECK-NEXT: ld.u32
+; CHECK-NEXT: ld.u32
+; CHECK-NEXT: add.s32
+; CHECK-NEXT: add.s32
+; CHECK-NEXT: add.s32
+ %ptr0 = getelementptr i32* %a, i32 0
+ %val0 = load i32* %ptr0
+ %ptr1 = getelementptr i32* %a, i32 1
+ %val1 = load i32* %ptr1
+ %ptr2 = getelementptr i32* %a, i32 2
+ %val2 = load i32* %ptr2
+ %ptr3 = getelementptr i32* %a, i32 3
+ %val3 = load i32* %ptr3
+
+ %t0 = add i32 %val0, %val1
+ %t1 = add i32 %t0, %val2
+ %t2 = add i32 %t1, %val3
+
+ store i32 %t2, i32* %a
+
+ ret void
+}
+
--- /dev/null
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
+
+define void @foo(<2 x i32>* %a) {
+; CHECK: .func foo
+; CHECK: ld.v2.u32
+; CHECK-NEXT: ld.v2.u32
+; CHECK-NEXT: ld.v2.u32
+; CHECK-NEXT: ld.v2.u32
+; CHECK-NEXT: add.s32
+; CHECK-NEXT: add.s32
+; CHECK-NEXT: add.s32
+; CHECK-NEXT: add.s32
+; CHECK-NEXT: add.s32
+; CHECK-NEXT: add.s32
+ %ptr0 = getelementptr <2 x i32>* %a, i32 0
+ %val0 = load <2 x i32>* %ptr0
+ %ptr1 = getelementptr <2 x i32>* %a, i32 1
+ %val1 = load <2 x i32>* %ptr1
+ %ptr2 = getelementptr <2 x i32>* %a, i32 2
+ %val2 = load <2 x i32>* %ptr2
+ %ptr3 = getelementptr <2 x i32>* %a, i32 3
+ %val3 = load <2 x i32>* %ptr3
+
+ %t0 = add <2 x i32> %val0, %val1
+ %t1 = add <2 x i32> %t0, %val2
+ %t2 = add <2 x i32> %t1, %val3
+
+ store <2 x i32> %t2, <2 x i32>* %a
+
+ ret void
+}
+
define void @foo(<2 x float>* %a) {
; CHECK: .func foo
-; CHECK: ld.v2.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}}, [%r{{[0-9]+}}];
+; CHECK: ld.v2.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}}
%t1 = load <2 x float>* %a
%t2 = fmul <2 x float> %t1, %t1
store <2 x float> %t2, <2 x float>* %a
define void @foo2(<4 x float>* %a) {
; CHECK: .func foo2
-; CHECK: ld.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}, [%r{{[0-9]+}}];
+; CHECK: ld.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}
%t1 = load <4 x float>* %a
%t2 = fmul <4 x float> %t1, %t1
store <4 x float> %t2, <4 x float>* %a
define void @foo3(<8 x float>* %a) {
; CHECK: .func foo3
-; CHECK: ld.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}, [%r{{[0-9]+}}];
-; CHECK-NEXT: ld.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}, [%r{{[0-9]+}}+16];
+; CHECK: ld.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}
+; CHECK-NEXT: ld.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}
%t1 = load <8 x float>* %a
%t2 = fmul <8 x float> %t1, %t1
store <8 x float> %t2, <8 x float>* %a
define void @foo4(<2 x i32>* %a) {
; CHECK: .func foo4
-; CHECK: ld.v2.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}}, [%r{{[0-9]+}}];
+; CHECK: ld.v2.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}}
%t1 = load <2 x i32>* %a
%t2 = mul <2 x i32> %t1, %t1
store <2 x i32> %t2, <2 x i32>* %a
define void @foo5(<4 x i32>* %a) {
; CHECK: .func foo5
-; CHECK: ld.v4.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}, [%r{{[0-9]+}}];
+; CHECK: ld.v4.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}
%t1 = load <4 x i32>* %a
%t2 = mul <4 x i32> %t1, %t1
store <4 x i32> %t2, <4 x i32>* %a
define void @foo6(<8 x i32>* %a) {
; CHECK: .func foo6
-; CHECK: ld.v4.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}, [%r{{[0-9]+}}];
-; CHECK-NEXT: ld.v4.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}, [%r{{[0-9]+}}+16];
+; CHECK: ld.v4.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}
+; CHECK-NEXT: ld.v4.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}
%t1 = load <8 x i32>* %a
%t2 = mul <8 x i32> %t1, %t1
store <8 x i32> %t2, <8 x i32>* %a
%1 = fdiv double %x, %y
%2 = fsub double %x, %z
- %3 = fptoui double %1 to i64
- %4 = fptoui double %2 to i64
- %5 = sub i64 %3, %4
+ %3 = fptoui double %2 to i64
+ %4 = fptoui double %1 to i64
+ %5 = sub i64 %4, %3
ret i64 %5
}
; FTOL_2: calll __ftol2
;; stack is %x
- %1 = fptoui double %x to i64
- %2 = fptoui double %y to i64
- %3 = sub i64 %1, %2
+ %1 = fptoui double %y to i64
+ %2 = fptoui double %x to i64
+ %3 = sub i64 %2, %1
%4 = insertvalue {double, i64} undef, double %x, 0
%5 = insertvalue {double, i64} %4, i64 %3, 1
ret {double, i64} %5