setOperationAction(ISD::VSETCC, MVT::v2i64, Custom);
if (!UseSoftFloat && Subtarget->hasAVX()) {
- addRegisterClass(MVT::v8f32, X86::VR256RegisterClass);
- addRegisterClass(MVT::v4f64, X86::VR256RegisterClass);
- addRegisterClass(MVT::v8i32, X86::VR256RegisterClass);
- addRegisterClass(MVT::v4i64, X86::VR256RegisterClass);
- addRegisterClass(MVT::v32i8, X86::VR256RegisterClass);
+ addRegisterClass(MVT::v32i8, X86::VR256RegisterClass);
+ addRegisterClass(MVT::v16i16, X86::VR256RegisterClass);
+ addRegisterClass(MVT::v8i32, X86::VR256RegisterClass);
+ addRegisterClass(MVT::v8f32, X86::VR256RegisterClass);
+ addRegisterClass(MVT::v4i64, X86::VR256RegisterClass);
+ addRegisterClass(MVT::v4f64, X86::VR256RegisterClass);
setOperationAction(ISD::LOAD, MVT::v8f32, Legal);
setOperationAction(ISD::LOAD, MVT::v4f64, Legal);
}
let Predicates = [HasAVX] in {
- def : Pat<(v4f64 (bitconvert (v8f32 VR256:$src))), (v4f64 VR256:$src)>;
- def : Pat<(v4f64 (bitconvert (v4i64 VR256:$src))), (v4f64 VR256:$src)>;
- def : Pat<(v4f64 (bitconvert (v32i8 VR256:$src))), (v4f64 VR256:$src)>;
- def : Pat<(v8f32 (bitconvert (v4i64 VR256:$src))), (v8f32 VR256:$src)>;
- def : Pat<(v8f32 (bitconvert (v4f64 VR256:$src))), (v8f32 VR256:$src)>;
- def : Pat<(v8f32 (bitconvert (v32i8 VR256:$src))), (v8f32 VR256:$src)>;
- def : Pat<(v4i64 (bitconvert (v8f32 VR256:$src))), (v4i64 VR256:$src)>;
- def : Pat<(v4i64 (bitconvert (v4f64 VR256:$src))), (v4i64 VR256:$src)>;
- def : Pat<(v4i64 (bitconvert (v32i8 VR256:$src))), (v4i64 VR256:$src)>;
- def : Pat<(v32i8 (bitconvert (v4f64 VR256:$src))), (v32i8 VR256:$src)>;
- def : Pat<(v32i8 (bitconvert (v4i64 VR256:$src))), (v32i8 VR256:$src)>;
- def : Pat<(v32i8 (bitconvert (v8f32 VR256:$src))), (v32i8 VR256:$src)>;
- def : Pat<(v32i8 (bitconvert (v8i32 VR256:$src))), (v32i8 VR256:$src)>;
- def : Pat<(v8i32 (bitconvert (v32i8 VR256:$src))), (v8i32 VR256:$src)>;
+ def : Pat<(v4f64 (bitconvert (v8f32 VR256:$src))), (v4f64 VR256:$src)>;
+ def : Pat<(v4f64 (bitconvert (v4i64 VR256:$src))), (v4f64 VR256:$src)>;
+ def : Pat<(v4f64 (bitconvert (v32i8 VR256:$src))), (v4f64 VR256:$src)>;
+ def : Pat<(v8f32 (bitconvert (v4i64 VR256:$src))), (v8f32 VR256:$src)>;
+ def : Pat<(v8f32 (bitconvert (v4f64 VR256:$src))), (v8f32 VR256:$src)>;
+ def : Pat<(v8f32 (bitconvert (v32i8 VR256:$src))), (v8f32 VR256:$src)>;
+ def : Pat<(v8f32 (bitconvert (v16i16 VR256:$src))), (v8f32 VR256:$src)>;
+ def : Pat<(v4i64 (bitconvert (v8f32 VR256:$src))), (v4i64 VR256:$src)>;
+ def : Pat<(v4i64 (bitconvert (v4f64 VR256:$src))), (v4i64 VR256:$src)>;
+ def : Pat<(v4i64 (bitconvert (v32i8 VR256:$src))), (v4i64 VR256:$src)>;
+ def : Pat<(v32i8 (bitconvert (v4f64 VR256:$src))), (v32i8 VR256:$src)>;
+ def : Pat<(v32i8 (bitconvert (v4i64 VR256:$src))), (v32i8 VR256:$src)>;
+ def : Pat<(v32i8 (bitconvert (v8f32 VR256:$src))), (v32i8 VR256:$src)>;
+ def : Pat<(v32i8 (bitconvert (v8i32 VR256:$src))), (v32i8 VR256:$src)>;
+ def : Pat<(v8i32 (bitconvert (v32i8 VR256:$src))), (v8i32 VR256:$src)>;
+ def : Pat<(v16i16 (bitconvert (v8f32 VR256:$src))), (v16i16 VR256:$src)>;
}
// Move scalar to XMM zero-extended
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
-; FIXME: use avx versions for punpcklbw and punpckhbw
+; FIXME: use avx versions for punpcklbw, punpckhbw and punpckhwd
; CHECK: vextractf128 $0
; CHECK-NEXT: punpcklbw
ret <32 x i8> %shuffle
}
+; CHECK: vextractf128 $0
+; CHECK-NEXT: punpckhwd
+; CHECK-NEXT: vinsertf128 $0
+; CHECK-NEXT: vinsertf128 $1
+; CHECK-NEXT: vpermilps $85
+define <16 x i16> @funcB(<16 x i16> %a) nounwind uwtable readnone ssp {
+entry:
+ %shuffle = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+ ret <16 x i16> %shuffle
+}
+