(zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))))))),
(FCVTDHr (LDURHi GPR64sp:$Rn, simm9:$offset))>;
+// When converting to f16 going directly to a store, make sure we use the
+// appropriate direct conversion instructions and store via the FPR16
+// registers rather than going through the GPRs.
+let AddedComplexity = 10 in {
+// f32->f16
+def : Pat< (truncstorei16 (assertzext (i32 (f32_to_f16 FPR32:$Rt))),
+ (ro_Windexed16 GPR64sp:$Rn, GPR32:$Rm,
+ ro_Wextend16:$extend)),
+ (STRHroW (FCVTHSr FPR32:$Rt), GPR64sp:$Rn, GPR32:$Rm,
+ ro_Wextend16:$extend)>;
+def : Pat< (truncstorei16 (assertzext (i32 (f32_to_f16 FPR32:$Rt))),
+ (ro_Xindexed16 GPR64sp:$Rn, GPR64:$Rm,
+ ro_Xextend16:$extend)),
+ (STRHroX (FCVTHSr FPR32:$Rt), GPR64sp:$Rn, GPR64:$Rm,
+ ro_Xextend16:$extend)>;
+def : Pat <(truncstorei16 (assertzext (i32 (f32_to_f16 FPR32:$Rt))),
+ (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)),
+ (STRHui (FCVTHSr FPR32:$Rt), GPR64sp:$Rn, uimm12s2:$offset)>;
+def : Pat <(truncstorei16 (assertzext (i32 (f32_to_f16 FPR32:$Rt))),
+ (am_unscaled16 GPR64sp:$Rn, simm9:$offset)),
+ (STURHi (FCVTHSr FPR32:$Rt), GPR64sp:$Rn, simm9:$offset)>;
+// f64->f16
+def : Pat< (truncstorei16 (assertzext (i32 (f32_to_f16 (f32 (fround FPR64:$Rt))))),
+ (ro_Windexed16 GPR64sp:$Rn, GPR32:$Rm,
+ ro_Wextend16:$extend)),
+ (STRHroW (FCVTHDr FPR64:$Rt), GPR64sp:$Rn, GPR32:$Rm,
+ ro_Wextend16:$extend)>;
+def : Pat< (truncstorei16 (assertzext (i32 (f32_to_f16 (f32 (fround FPR64:$Rt))))),
+ (ro_Xindexed16 GPR64sp:$Rn, GPR64:$Rm,
+ ro_Xextend16:$extend)),
+ (STRHroX (FCVTHDr FPR64:$Rt), GPR64sp:$Rn, GPR64:$Rm,
+ ro_Xextend16:$extend)>;
+def : Pat <(truncstorei16 (assertzext (i32 (f32_to_f16 (f32 (fround FPR64:$Rt))))),
+ (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)),
+ (STRHui (FCVTHDr FPR64:$Rt), GPR64sp:$Rn, uimm12s2:$offset)>;
+def : Pat <(truncstorei16 (assertzext (i32 (f32_to_f16 (f32 (fround FPR64:$Rt))))),
+ (am_unscaled16 GPR64sp:$Rn, simm9:$offset)),
+ (STURHi (FCVTHDr FPR64:$Rt), GPR64sp:$Rn, simm9:$offset)>;
+}
+
//===----------------------------------------------------------------------===//
// Floating point single operand instructions.
ret double %conv
}
+define void @store0(i16* nocapture %a, float %val) nounwind {
+; CHECK-LABEL: store0:
+; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: str h0, [x0]
+; CHECK-NEXT: ret
+
+ %tmp = tail call i16 @llvm.convert.to.fp16(float %val)
+ store i16 %tmp, i16* %a, align 2
+ ret void
+}
+
+define void @store1(i16* nocapture %a, double %val) nounwind {
+; CHECK-LABEL: store1:
+; CHECK-NEXT: fcvt h0, d0
+; CHECK-NEXT: str h0, [x0]
+; CHECK-NEXT: ret
+
+ %conv = fptrunc double %val to float
+ %tmp = tail call i16 @llvm.convert.to.fp16(float %conv)
+ store i16 %tmp, i16* %a, align 2
+ ret void
+}
+
+define void @store2(i16* nocapture %a, i32 %i, float %val) nounwind {
+; CHECK-LABEL: store2:
+; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: str h0, [x0, w1, sxtw #1]
+; CHECK-NEXT: ret
+
+ %tmp = tail call i16 @llvm.convert.to.fp16(float %val)
+ %idxprom = sext i32 %i to i64
+ %arrayidx = getelementptr inbounds i16* %a, i64 %idxprom
+ store i16 %tmp, i16* %arrayidx, align 2
+ ret void
+}
+
+define void @store3(i16* nocapture %a, i32 %i, double %val) nounwind {
+; CHECK-LABEL: store3:
+; CHECK-NEXT: fcvt h0, d0
+; CHECK-NEXT: str h0, [x0, w1, sxtw #1]
+; CHECK-NEXT: ret
+
+ %conv = fptrunc double %val to float
+ %tmp = tail call i16 @llvm.convert.to.fp16(float %conv)
+ %idxprom = sext i32 %i to i64
+ %arrayidx = getelementptr inbounds i16* %a, i64 %idxprom
+ store i16 %tmp, i16* %arrayidx, align 2
+ ret void
+}
+
+define void @store4(i16* nocapture %a, i64 %i, float %val) nounwind {
+; CHECK-LABEL: store4:
+; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: str h0, [x0, x1, lsl #1]
+; CHECK-NEXT: ret
+
+ %tmp = tail call i16 @llvm.convert.to.fp16(float %val)
+ %arrayidx = getelementptr inbounds i16* %a, i64 %i
+ store i16 %tmp, i16* %arrayidx, align 2
+ ret void
+}
+
+define void @store5(i16* nocapture %a, i64 %i, double %val) nounwind {
+; CHECK-LABEL: store5:
+; CHECK-NEXT: fcvt h0, d0
+; CHECK-NEXT: str h0, [x0, x1, lsl #1]
+; CHECK-NEXT: ret
+
+ %conv = fptrunc double %val to float
+ %tmp = tail call i16 @llvm.convert.to.fp16(float %conv)
+ %arrayidx = getelementptr inbounds i16* %a, i64 %i
+ store i16 %tmp, i16* %arrayidx, align 2
+ ret void
+}
+
+define void @store6(i16* nocapture %a, float %val) nounwind {
+; CHECK-LABEL: store6:
+; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: str h0, [x0, #20]
+; CHECK-NEXT: ret
+
+ %tmp = tail call i16 @llvm.convert.to.fp16(float %val)
+ %arrayidx = getelementptr inbounds i16* %a, i64 10
+ store i16 %tmp, i16* %arrayidx, align 2
+ ret void
+}
+
+define void @store7(i16* nocapture %a, double %val) nounwind {
+; CHECK-LABEL: store7:
+; CHECK-NEXT: fcvt h0, d0
+; CHECK-NEXT: str h0, [x0, #20]
+; CHECK-NEXT: ret
+
+ %conv = fptrunc double %val to float
+ %tmp = tail call i16 @llvm.convert.to.fp16(float %conv)
+ %arrayidx = getelementptr inbounds i16* %a, i64 10
+ store i16 %tmp, i16* %arrayidx, align 2
+ ret void
+}
+
+define void @store8(i16* nocapture %a, float %val) nounwind {
+; CHECK-LABEL: store8:
+; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: stur h0, [x0, #-20]
+; CHECK-NEXT: ret
+
+ %tmp = tail call i16 @llvm.convert.to.fp16(float %val)
+ %arrayidx = getelementptr inbounds i16* %a, i64 -10
+ store i16 %tmp, i16* %arrayidx, align 2
+ ret void
+}
+
+define void @store9(i16* nocapture %a, double %val) nounwind {
+; CHECK-LABEL: store9:
+; CHECK-NEXT: fcvt h0, d0
+; CHECK-NEXT: stur h0, [x0, #-20]
+; CHECK-NEXT: ret
+
+ %conv = fptrunc double %val to float
+ %tmp = tail call i16 @llvm.convert.to.fp16(float %conv)
+ %arrayidx = getelementptr inbounds i16* %a, i64 -10
+ store i16 %tmp, i16* %arrayidx, align 2
+ ret void
+}
+
+declare i16 @llvm.convert.to.fp16(float) nounwind readnone
declare float @llvm.convert.from.fp16(i16) nounwind readnone