From b69d556c370b32dee9f64d8250e51aad33963cc2 Mon Sep 17 00:00:00 2001
From: Nemanja Ivanovic <nemanja.i.ibm@gmail.com>
Date: Wed, 4 Mar 2015 20:44:33 +0000
Subject: [PATCH] Add LLVM support for PPC cryptography builtins Review:
 http://reviews.llvm.org/D7955

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@231285 91177308-0d34-0410-b5e6-96231b3b80d8
---
 include/llvm/IR/IntrinsicsPowerPC.td          |  38 ++-
 lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp |   1 +
 .../PowerPC/InstPrinter/PPCInstPrinter.cpp    |   7 +
 .../PowerPC/InstPrinter/PPCInstPrinter.h      |   1 +
 lib/Target/PowerPC/PPC.td                     |   5 +-
 lib/Target/PowerPC/PPCInstrAltivec.td         |  40 +++
 lib/Target/PowerPC/PPCInstrFormats.td         |  33 +++
 lib/Target/PowerPC/PPCInstrInfo.td            |   9 +
 lib/Target/PowerPC/PPCSubtarget.cpp           |   1 +
 lib/Target/PowerPC/PPCSubtarget.h             |   2 +
 lib/Target/PowerPC/README_ALTIVEC.txt         |  45 +++
 test/CodeGen/PowerPC/crypto_bifs.ll           | 275 ++++++++++++++++++
 .../PowerPC/ppc64-encoding-vmx.txt            |  36 +++
 test/MC/PowerPC/ppc64-encoding-vmx.s          |  49 ++++
 14 files changed, 539 insertions(+), 3 deletions(-)
 create mode 100644 test/CodeGen/PowerPC/crypto_bifs.ll
diff --git a/include/llvm/IR/IntrinsicsPowerPC.td b/include/llvm/IR/IntrinsicsPowerPC.td
index 36fb1e94ae2..bb03e053a05 100644
--- a/include/llvm/IR/IntrinsicsPowerPC.td
+++ b/include/llvm/IR/IntrinsicsPowerPC.td
@@ -73,7 +73,7 @@ class PowerPC_Vec_FFF_Intrinsic<string GCCIntSuffix>
                           [llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty],
                           [IntrNoMem]>;
 
-/// PowerPC_Vec_BBB_Intrinsic - A PowerPC intrinsic that takes two v16f8
+/// PowerPC_Vec_BBB_Intrinsic - A PowerPC intrinsic that takes two v16i8
 /// vectors and returns one.  These intrinsics have no side effects.
 class PowerPC_Vec_BBB_Intrinsic<string GCCIntSuffix> 
   : PowerPC_Vec_Intrinsic<GCCIntSuffix,
@@ -126,7 +126,6 @@ class PowerPC_VSX_Sca_DDD_Intrinsic<string GCCIntSuffix>
                           [llvm_double_ty], [llvm_double_ty, llvm_double_ty],
                           [IntrNoMem]>;
 
-
 //===----------------------------------------------------------------------===//
 // PowerPC Altivec Intrinsic Definitions.
 
@@ -559,6 +558,41 @@ def int_ppc_altivec_vlogefp   : PowerPC_Vec_FF_Intrinsic<"vlogefp">;
 def int_ppc_altivec_vrefp     : PowerPC_Vec_FF_Intrinsic<"vrefp">;
 def int_ppc_altivec_vrsqrtefp : PowerPC_Vec_FF_Intrinsic<"vrsqrtefp">;
 
+// Power8 Intrinsics
+// Crypto
+let TargetPrefix = "ppc" in {  // All PPC intrinsics start with "llvm.ppc.".
+  def int_ppc_altivec_crypto_vsbox :
+              GCCBuiltin<"__builtin_altivec_crypto_vsbox">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty], [IntrNoMem]>;
+  def int_ppc_altivec_crypto_vpermxor :
+              GCCBuiltin<"__builtin_altivec_crypto_vpermxor">,
+              Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty,
+                         llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
+
+// These need diagnostics for invalid arguments so don't inherit from GCCBuiltin
+def int_ppc_altivec_crypto_vshasigmad :
+            Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty,
+                       llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+def int_ppc_altivec_crypto_vshasigmaw :
+            Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
+                       llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+}
+def int_ppc_altivec_crypto_vcipher :
+            PowerPC_Vec_DDD_Intrinsic<"crypto_vcipher">;
+def int_ppc_altivec_crypto_vcipherlast :
+            PowerPC_Vec_DDD_Intrinsic<"crypto_vcipherlast">;
+def int_ppc_altivec_crypto_vncipher :
+            PowerPC_Vec_DDD_Intrinsic<"crypto_vncipher">;
+def int_ppc_altivec_crypto_vncipherlast :
+            PowerPC_Vec_DDD_Intrinsic<"crypto_vncipherlast">;
+def int_ppc_altivec_crypto_vpmsumb :
+            PowerPC_Vec_BBB_Intrinsic<"crypto_vpmsumb">;
+def int_ppc_altivec_crypto_vpmsumh :
+            PowerPC_Vec_HHH_Intrinsic<"crypto_vpmsumh">;
+def int_ppc_altivec_crypto_vpmsumw :
+            PowerPC_Vec_WWW_Intrinsic<"crypto_vpmsumw">;
+def int_ppc_altivec_crypto_vpmsumd :
+            PowerPC_Vec_DDD_Intrinsic<"crypto_vpmsumd">;
 
 //===----------------------------------------------------------------------===//
 // PowerPC VSX Intrinsic Definitions.
diff --git a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
index bf00e7397be..c164f32fbb6 100644
--- a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
+++ b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
@@ -425,6 +425,7 @@ public:
 
   bool isToken() const override { return Kind == Token; }
   bool isImm() const override { return Kind == Immediate || Kind == Expression; }
+  bool isU1Imm() const { return Kind == Immediate && isUInt<1>(getImm()); }
   bool isU2Imm() const { return Kind == Immediate && isUInt<2>(getImm()); }
   bool isU4Imm() const { return Kind == Immediate && isUInt<4>(getImm()); }
   bool isU5Imm() const { return Kind == Immediate && isUInt<5>(getImm()); }
diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
index c287fbe7c5b..5d1aa1ad540 100644
--- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
+++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
@@ -214,6 +214,13 @@ void PPCInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNo,
   printOperand(MI, OpNo+1, O);
 }
 
+void PPCInstPrinter::printU1ImmOperand(const MCInst *MI, unsigned OpNo,
+                                       raw_ostream &O) {
+  unsigned int Value = MI->getOperand(OpNo).getImm();
+  assert(Value <= 1 && "Invalid u1imm argument!");
+  O << (unsigned int)Value;
+}
+
 void PPCInstPrinter::printU2ImmOperand(const MCInst *MI, unsigned OpNo,
                                        raw_ostream &O) {
   unsigned int Value = MI->getOperand(OpNo).getImm();
diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
index 6ead19b33fe..22934b8f4fb 100644
--- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
+++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
@@ -43,6 +43,7 @@ public:
   void printPredicateOperand(const MCInst *MI, unsigned OpNo,
                              raw_ostream &O, const char *Modifier = nullptr);
 
+  void printU1ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
   void printU2ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
   void printU4ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
   void printS5ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
diff --git a/lib/Target/PowerPC/PPC.td b/lib/Target/PowerPC/PPC.td
index 740488b4ff5..dc2a0d5f422 100644
--- a/lib/Target/PowerPC/PPC.td
+++ b/lib/Target/PowerPC/PPC.td
@@ -112,6 +112,9 @@ def FeatureVSX       : SubtargetFeature<"vsx","HasVSX", "true",
 def FeatureP8Altivec : SubtargetFeature<"power8-altivec", "HasP8Altivec", "true",
                                         "Enable POWER8 Altivec instructions",
                                         [FeatureAltivec]>;
+def FeatureP8Crypto : SubtargetFeature<"crypto", "HasP8Crypto", "true",
+                                        "Enable POWER8 Crypto instructions",
+                                        [FeatureP8Altivec]>;
 def FeatureP8Vector  : SubtargetFeature<"power8-vector", "HasP8Vector", "true",
                                         "Enable POWER8 vector instructions",
                                         [FeatureVSX, FeatureP8Altivec]>;
@@ -258,7 +261,7 @@ def ProcessorFeatures {
         FeatureFRE, FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES,
         FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX,
         FeatureFPRND, FeatureFPCVT, FeatureISEL,
-        FeaturePOPCNTD, FeatureCMPB, FeatureLDBRX,
+        FeaturePOPCNTD, FeatureCMPB, FeatureLDBRX, FeatureP8Crypto,
         Feature64Bit /*, Feature64BitRegs */, FeatureICBT,
         DeprecatedMFTB, DeprecatedDST];
 }
diff --git a/lib/Target/PowerPC/PPCInstrAltivec.td b/lib/Target/PowerPC/PPCInstrAltivec.td
index 4e784a59350..2de852b4843 100644
--- a/lib/Target/PowerPC/PPCInstrAltivec.td
+++ b/lib/Target/PowerPC/PPCInstrAltivec.td
@@ -269,6 +269,16 @@ class VX2_Int_Ty2<bits<11> xo, string opc, Intrinsic IntID, ValueType OutTy,
              !strconcat(opc, " $vD, $vB"), IIC_VecFP,
              [(set OutTy:$vD, (IntID InTy:$vB))]>;
 
+class VXBX_Int_Ty<bits<11> xo, string opc, Intrinsic IntID, ValueType Ty>
+  : VXForm_BX<xo, (outs vrrc:$vD), (ins vrrc:$vA),
+             !strconcat(opc, " $vD, $vA"), IIC_VecFP,
+             [(set Ty:$vD, (IntID Ty:$vA))]>;
+
+class VXCR_Int_Ty<bits<11> xo, string opc, Intrinsic IntID, ValueType Ty>
+  : VXForm_CR<xo, (outs vrrc:$vD), (ins vrrc:$vA, u1imm:$ST, u4imm:$SIX),
+              !strconcat(opc, " $vD, $vA, $ST, $SIX"), IIC_VecFP,
+              [(set Ty:$vD, (IntID Ty:$vA, imm:$ST, imm:$SIX))]>;
+
 //===----------------------------------------------------------------------===//
 // Instruction Definitions.
 
@@ -939,6 +949,7 @@ def : Pat<(v4f32 (fnearbyint v4f32:$vA)),
 } // end HasAltivec
 
 def HasP8Altivec : Predicate<"PPCSubTarget->hasP8Altivec()">;
+def HasP8Crypto : Predicate<"PPCSubTarget->hasP8Crypto()">;
 let Predicates = [HasP8Altivec] in {
 
 let isCommutable = 1 in {
@@ -1035,4 +1046,33 @@ def VCMPGTSDo : VCMPo<967, "vcmpgtsd. $vD, $vA, $vB", v2i64>;
 def VCMPGTUD  : VCMP <711, "vcmpgtud $vD, $vA, $vB" , v2i64>;
 def VCMPGTUDo : VCMPo<711, "vcmpgtud. $vD, $vA, $vB", v2i64>;
 
+// The cryptography instructions that do not require Category:Vector.Crypto
+def VPMSUMB : VX1_Int_Ty<1032, "vpmsumb",
+                         int_ppc_altivec_crypto_vpmsumb, v16i8>;
+def VPMSUMH : VX1_Int_Ty<1096, "vpmsumh",
+                         int_ppc_altivec_crypto_vpmsumh, v8i16>;
+def VPMSUMW : VX1_Int_Ty<1160, "vpmsumw",
+                         int_ppc_altivec_crypto_vpmsumw, v4i32>;
+def VPMSUMD : VX1_Int_Ty<1224, "vpmsumd",
+                         int_ppc_altivec_crypto_vpmsumd, v2i64>;
+def VPERMXOR : VA1a_Int_Ty<45, "vpermxor",
+                         int_ppc_altivec_crypto_vpermxor, v16i8>;
+
 } // end HasP8Altivec
+
+// Crypto instructions (from builtins)
+let Predicates = [HasP8Crypto] in {
+def VSHASIGMAW : VXCR_Int_Ty<1666, "vshasigmaw",
+                              int_ppc_altivec_crypto_vshasigmaw, v4i32>;
+def VSHASIGMAD : VXCR_Int_Ty<1730, "vshasigmad",
+                              int_ppc_altivec_crypto_vshasigmad, v2i64>;
+def VCIPHER : VX1_Int_Ty<1288, "vcipher", int_ppc_altivec_crypto_vcipher,
+                         v2i64>;
+def VCIPHERLAST : VX1_Int_Ty<1289, "vcipherlast",
+                              int_ppc_altivec_crypto_vcipherlast, v2i64>;
+def VNCIPHER : VX1_Int_Ty<1352, "vncipher",
+                          int_ppc_altivec_crypto_vncipher, v2i64>;
+def VNCIPHERLAST : VX1_Int_Ty<1353, "vncipherlast",
+                              int_ppc_altivec_crypto_vncipherlast, v2i64>;
+def VSBOX : VXBX_Int_Ty<1480, "vsbox", int_ppc_altivec_crypto_vsbox, v2i64>;
+} // HasP8Crypto
diff --git a/lib/Target/PowerPC/PPCInstrFormats.td b/lib/Target/PowerPC/PPCInstrFormats.td
index 506a2d0c7ae..2d3e43c6f20 100644
--- a/lib/Target/PowerPC/PPCInstrFormats.td
+++ b/lib/Target/PowerPC/PPCInstrFormats.td
@@ -1470,6 +1470,39 @@ class VXForm_5<bits<11> xo, dag OOL, dag IOL, string asmstr,
   let Inst{21-31} = xo;
 }
 
+/// VXForm_CR - VX crypto instructions with "VRT, VRA, ST, SIX"
+class VXForm_CR<bits<11> xo, dag OOL, dag IOL, string asmstr,
+               InstrItinClass itin, list<dag> pattern>
+    : I<4, OOL, IOL, asmstr, itin> {
+  bits<5> VD;
+  bits<5> VA;
+  bits<1> ST;
+  bits<4> SIX;
+  
+  let Pattern = pattern;
+  
+  let Inst{6-10}  = VD;
+  let Inst{11-15} = VA;
+  let Inst{16} =  ST;
+  let Inst{17-20} = SIX;
+  let Inst{21-31} = xo;
+}
+
+/// VXForm_BX - VX crypto instructions with "VRT, VRA, 0 - like vsbox"
+class VXForm_BX<bits<11> xo, dag OOL, dag IOL, string asmstr,
+               InstrItinClass itin, list<dag> pattern>
+    : I<4, OOL, IOL, asmstr, itin> {
+  bits<5> VD;
+  bits<5> VA;
+  
+  let Pattern = pattern;
+  
+  let Inst{6-10}  = VD;
+  let Inst{11-15} = VA;
+  let Inst{16-20} = 0;
+  let Inst{21-31} = xo;
+}
+
 // E-4 VXR-Form
 class VXRForm_1<bits<10> xo, dag OOL, dag IOL, string asmstr,
                InstrItinClass itin, list<dag> pattern>
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td
index 1a045b1393d..fe436caf5f5 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -446,6 +446,15 @@ def crrc : RegisterOperand<CRRC> {
   let ParserMatchClass = PPCRegCRRCAsmOperand;
 }
 
+def PPCU1ImmAsmOperand : AsmOperandClass {
+  let Name = "U1Imm"; let PredicateMethod = "isU1Imm";
+  let RenderMethod = "addImmOperands";
+}
+def u1imm   : Operand<i32> {
+  let PrintMethod = "printU1ImmOperand";
+  let ParserMatchClass = PPCU1ImmAsmOperand;
+}
+
 def PPCU2ImmAsmOperand : AsmOperandClass {
   let Name = "U2Imm"; let PredicateMethod = "isU2Imm";
   let RenderMethod = "addImmOperands";
diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp
index c91428db3a9..02b721b433b 100644
--- a/lib/Target/PowerPC/PPCSubtarget.cpp
+++ b/lib/Target/PowerPC/PPCSubtarget.cpp
@@ -69,6 +69,7 @@ void PPCSubtarget::initializeEnvironment() {
   HasVSX = false;
   HasP8Vector = false;
   HasP8Altivec = false;
+  HasP8Crypto = false;
   HasFCPSGN = false;
   HasFSQRT = false;
   HasFRE = false;
diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h
index 247a96d405e..a74b5fb4a86 100644
--- a/lib/Target/PowerPC/PPCSubtarget.h
+++ b/lib/Target/PowerPC/PPCSubtarget.h
@@ -90,6 +90,7 @@ protected:
   bool HasVSX;
   bool HasP8Vector;
   bool HasP8Altivec;
+  bool HasP8Crypto;
   bool HasFCPSGN;
   bool HasFSQRT;
   bool HasFRE, HasFRES, HasFRSQRTE, HasFRSQRTES;
@@ -218,6 +219,7 @@ public:
   bool hasVSX() const { return HasVSX; }
   bool hasP8Vector() const { return HasP8Vector; }
   bool hasP8Altivec() const { return HasP8Altivec; }
+  bool hasP8Crypto() const { return HasP8Crypto; }
   bool hasMFOCRF() const { return HasMFOCRF; }
   bool hasISEL() const { return HasISEL; }
   bool hasPOPCNTD() const { return HasPOPCNTD; }
diff --git a/lib/Target/PowerPC/README_ALTIVEC.txt b/lib/Target/PowerPC/README_ALTIVEC.txt
index cb3ad5bd3a1..34765de1b21 100644
--- a/lib/Target/PowerPC/README_ALTIVEC.txt
+++ b/lib/Target/PowerPC/README_ALTIVEC.txt
@@ -211,6 +211,51 @@ vector float f(vector float a, vector float b) {
 
 //===----------------------------------------------------------------------===//
 
+We should do a little better with eliminating dead stores.
+The stores to the stack are dead since %a and %b are not needed
+
+; Function Attrs: nounwind
+define <16 x i8> @test_vpmsumb() #0 {
+  entry:
+  %a = alloca <16 x i8>, align 16
+  %b = alloca <16 x i8>, align 16
+  store <16 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16>, <16 x i8>* %a, align 16
+  store <16 x i8> <i8 113, i8 114, i8 115, i8 116, i8 117, i8 118, i8 119, i8 120, i8 121, i8 122, i8 123, i8 124, i8 125, i8 126, i8 127, i8 112>, <16 x i8>* %b, align 16
+  %0 = load <16 x i8>* %a, align 16
+  %1 = load <16 x i8>* %b, align 16
+  %2 = call <16 x i8> @llvm.ppc.altivec.crypto.vpmsumb(<16 x i8> %0, <16 x i8> %1)
+  ret <16 x i8> %2
+}
+
+
+; Function Attrs: nounwind readnone
+declare <16 x i8> @llvm.ppc.altivec.crypto.vpmsumb(<16 x i8>, <16 x i8>) #1
+
+
+Produces the following code with -mtriple=powerpc64-unknown-linux-gnu:
+# BB#0:                                 # %entry
+    addis 3, 2, .LCPI0_0@toc@ha
+    addis 4, 2, .LCPI0_1@toc@ha
+    addi 3, 3, .LCPI0_0@toc@l
+    addi 4, 4, .LCPI0_1@toc@l
+    lxvw4x 0, 0, 3
+    addi 3, 1, -16
+    lxvw4x 35, 0, 4
+    stxvw4x 0, 0, 3
+    ori 2, 2, 0
+    lxvw4x 34, 0, 3
+    addi 3, 1, -32
+    stxvw4x 35, 0, 3
+    vpmsumb 2, 2, 3
+    blr
+    .long   0
+    .quad   0
+
+The two stxvw4x instructions are not needed.
+With -mtriple=powerpc64le-unknown-linux-gnu, the associated permutes
+are present too.
+//===----------------------------------------------------------------------===//
+
 The following example is found in test/CodeGen/PowerPC/vec_add_sub_doubleword.ll:
 
 define <2 x i64> @increment_by_val(<2 x i64> %x, i64 %val) nounwind {
diff --git a/test/CodeGen/PowerPC/crypto_bifs.ll b/test/CodeGen/PowerPC/crypto_bifs.ll
new file mode 100644
index 00000000000..f58935b85b6
--- /dev/null
+++ b/test/CodeGen/PowerPC/crypto_bifs.ll
@@ -0,0 +1,275 @@
+; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s
+; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s
+; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -mattr=+crypto < %s | FileCheck %s
+; FIXME: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s
+; FIXME: The original intent was to add a check-next for the blr after every check.
+; However, this currently fails since we don't eliminate stores of the unused
+; locals. These stores are sometimes scheduled after the crypto instruction
+
+; Function Attrs: nounwind
+define <16 x i8> @test_vpmsumb() #0 {
+entry:
+  %a = alloca <16 x i8>, align 16
+  %b = alloca <16 x i8>, align 16
+  store <16 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16>, <16 x i8>* %a, align 16
+  store <16 x i8> <i8 113, i8 114, i8 115, i8 116, i8 117, i8 118, i8 119, i8 120, i8 121, i8 122, i8 123, i8 124, i8 125, i8 126, i8 127, i8 112>, <16 x i8>* %b, align 16
+  %0 = load <16 x i8>,  <16 x i8>* %a, align 16
+  %1 = load <16 x i8>,  <16 x i8>* %b, align 16
+  %2 = call <16 x i8> @llvm.ppc.altivec.crypto.vpmsumb(<16 x i8> %0, <16 x i8> %1)
+  ret <16 x i8> %2
+; CHECK: vpmsumb 2,
+}
+
+; Function Attrs: nounwind readnone
+declare <16 x i8> @llvm.ppc.altivec.crypto.vpmsumb(<16 x i8>, <16 x i8>) #1
+
+; Function Attrs: nounwind
+define <8 x i16> @test_vpmsumh() #0 {
+entry:
+  %a = alloca <8 x i16>, align 16
+  %b = alloca <8 x i16>, align 16
+  store <8 x i16> <i16 258, i16 772, i16 1286, i16 1800, i16 2314, i16 2828, i16 3342, i16 3856>, <8 x i16>* %a, align 16
+  store <8 x i16> <i16 29042, i16 29556, i16 30070, i16 30584, i16 31098, i16 31612, i16 32126, i16 32624>, <8 x i16>* %b, align 16
+  %0 = load <8 x i16>,  <8 x i16>* %a, align 16
+  %1 = load <8 x i16>,  <8 x i16>* %b, align 16
+  %2 = call <8 x i16> @llvm.ppc.altivec.crypto.vpmsumh(<8 x i16> %0, <8 x i16> %1)
+  ret <8 x i16> %2
+; CHECK: vpmsumh 2,
+}
+
+; Function Attrs: nounwind readnone
+declare <8 x i16> @llvm.ppc.altivec.crypto.vpmsumh(<8 x i16>, <8 x i16>) #1
+
+; Function Attrs: nounwind
+define <4 x i32> @test_vpmsumw() #0 {
+entry:
+  %a = alloca <4 x i32>, align 16
+  %b = alloca <4 x i32>, align 16
+  store <4 x i32> <i32 16909060, i32 84281096, i32 151653132, i32 219025168>, <4 x i32>* %a, align 16
+  store <4 x i32> <i32 1903326068, i32 1970698104, i32 2038070140, i32 2105442160>, <4 x i32>* %b, align 16
+  %0 = load <4 x i32>,  <4 x i32>* %a, align 16
+  %1 = load <4 x i32>,  <4 x i32>* %b, align 16
+  %2 = call <4 x i32> @llvm.ppc.altivec.crypto.vpmsumw(<4 x i32> %0, <4 x i32> %1)
+  ret <4 x i32> %2
+; CHECK: vpmsumw 2,
+}
+
+; Function Attrs: nounwind readnone
+declare <4 x i32> @llvm.ppc.altivec.crypto.vpmsumw(<4 x i32>, <4 x i32>) #1
+
+; Function Attrs: nounwind
+define <2 x i64> @test_vpmsumd() #0 {
+entry:
+  %a = alloca <2 x i64>, align 16
+  %b = alloca <2 x i64>, align 16
+  store <2 x i64> <i64 72623859790382856, i64 651345242494996240>, <2 x i64>* %a, align 16
+  store <2 x i64> <i64 8174723217654970232, i64 8753444600359583600>, <2 x i64>* %b, align 16
+  %0 = load <2 x i64>,  <2 x i64>* %a, align 16
+  %1 = load <2 x i64>,  <2 x i64>* %b, align 16
+  %2 = call <2 x i64> @llvm.ppc.altivec.crypto.vpmsumd(<2 x i64> %0, <2 x i64> %1)
+  ret <2 x i64> %2
+; CHECK: vpmsumd 2,
+}
+
+; Function Attrs: nounwind readnone
+declare <2 x i64> @llvm.ppc.altivec.crypto.vpmsumd(<2 x i64>, <2 x i64>) #1
+
+; Function Attrs: nounwind
+define <2 x i64> @test_vsbox() #0 {
+entry:
+  %a = alloca <2 x i64>, align 16
+  store <2 x i64> <i64 72623859790382856, i64 651345242494996240>, <2 x i64>* %a, align 16
+  %0 = load <2 x i64>,  <2 x i64>* %a, align 16
+  %1 = call <2 x i64> @llvm.ppc.altivec.crypto.vsbox(<2 x i64> %0)
+  ret <2 x i64> %1
+; CHECK: vsbox 2,
+}
+
+; Function Attrs: nounwind readnone
+declare <2 x i64> @llvm.ppc.altivec.crypto.vsbox(<2 x i64>) #1
+
+; Function Attrs: nounwind
+define <16 x i8> @test_vpermxorb() #0 {
+entry:
+  %a = alloca <16 x i8>, align 16
+  %b = alloca <16 x i8>, align 16
+  %c = alloca <16 x i8>, align 16
+  store <16 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16>, <16 x i8>* %a, align 16
+  store <16 x i8> <i8 113, i8 114, i8 115, i8 116, i8 117, i8 118, i8 119, i8 120, i8 121, i8 122, i8 123, i8 124, i8 125, i8 126, i8 127, i8 112>, <16 x i8>* %b, align 16
+  store <16 x i8> <i8 113, i8 114, i8 115, i8 116, i8 117, i8 118, i8 119, i8 120, i8 121, i8 122, i8 123, i8 124, i8 125, i8 126, i8 127, i8 112>, <16 x i8>* %c, align 16
+  %0 = load <16 x i8>,  <16 x i8>* %a, align 16
+  %1 = load <16 x i8>,  <16 x i8>* %b, align 16
+  %2 = load <16 x i8>,  <16 x i8>* %c, align 16
+  %3 = call <16 x i8> @llvm.ppc.altivec.crypto.vpermxor(<16 x i8> %0, <16 x i8> %1, <16 x i8> %2)
+  ret <16 x i8> %3
+; CHECK: vpermxor 2,
+}
+
+; Function Attrs: nounwind readnone
+declare <16 x i8> @llvm.ppc.altivec.crypto.vpermxor(<16 x i8>, <16 x i8>, <16 x i8>) #1
+
+; Function Attrs: nounwind
+define <8 x i16> @test_vpermxorh() #0 {
+entry:
+  %a = alloca <8 x i16>, align 16
+  %b = alloca <8 x i16>, align 16
+  %c = alloca <8 x i16>, align 16
+  store <8 x i16> <i16 258, i16 772, i16 1286, i16 1800, i16 2314, i16 2828, i16 3342, i16 3856>, <8 x i16>* %a, align 16
+  store <8 x i16> <i16 29042, i16 29556, i16 30070, i16 30584, i16 31098, i16 31612, i16 32126, i16 32624>, <8 x i16>* %b, align 16
+  store <8 x i16> <i16 29042, i16 29556, i16 30070, i16 30584, i16 31098, i16 31612, i16 32126, i16 32624>, <8 x i16>* %c, align 16
+  %0 = load <8 x i16>,  <8 x i16>* %a, align 16
+  %1 = bitcast <8 x i16> %0 to <16 x i8>
+  %2 = load <8 x i16>,  <8 x i16>* %b, align 16
+  %3 = bitcast <8 x i16> %2 to <16 x i8>
+  %4 = load <8 x i16>,  <8 x i16>* %c, align 16
+  %5 = bitcast <8 x i16> %4 to <16 x i8>
+  %6 = call <16 x i8> @llvm.ppc.altivec.crypto.vpermxor(<16 x i8> %1, <16 x i8> %3, <16 x i8> %5)
+  %7 = bitcast <16 x i8> %6 to <8 x i16>
+  ret <8 x i16> %7
+; CHECK: vpermxor 2,
+}
+
+; Function Attrs: nounwind
+define <4 x i32> @test_vpermxorw() #0 {
+entry:
+  %a = alloca <4 x i32>, align 16
+  %b = alloca <4 x i32>, align 16
+  %c = alloca <4 x i32>, align 16
+  store <4 x i32> <i32 16909060, i32 84281096, i32 151653132, i32 219025168>, <4 x i32>* %a, align 16
+  store <4 x i32> <i32 1903326068, i32 1970698104, i32 2038070140, i32 2105442160>, <4 x i32>* %b, align 16
+  store <4 x i32> <i32 1903326068, i32 1970698104, i32 2038070140, i32 2105442160>, <4 x i32>* %c, align 16
+  %0 = load <4 x i32>,  <4 x i32>* %a, align 16
+  %1 = bitcast <4 x i32> %0 to <16 x i8>
+  %2 = load <4 x i32>,  <4 x i32>* %b, align 16
+  %3 = bitcast <4 x i32> %2 to <16 x i8>
+  %4 = load <4 x i32>,  <4 x i32>* %c, align 16
+  %5 = bitcast <4 x i32> %4 to <16 x i8>
+  %6 = call <16 x i8> @llvm.ppc.altivec.crypto.vpermxor(<16 x i8> %1, <16 x i8> %3, <16 x i8> %5)
+  %7 = bitcast <16 x i8> %6 to <4 x i32>
+  ret <4 x i32> %7
+; CHECK: vpermxor 2,
+}
+
+; Function Attrs: nounwind
+define <2 x i64> @test_vpermxord() #0 {
+entry:
+  %a = alloca <2 x i64>, align 16
+  %b = alloca <2 x i64>, align 16
+  %c = alloca <2 x i64>, align 16
+  store <2 x i64> <i64 72623859790382856, i64 651345242494996240>, <2 x i64>* %a, align 16
+  store <2 x i64> <i64 8174723217654970232, i64 8753444600359583600>, <2 x i64>* %b, align 16
+  store <2 x i64> <i64 8174723217654970232, i64 8753444600359583600>, <2 x i64>* %c, align 16
+  %0 = load <2 x i64>,  <2 x i64>* %a, align 16
+  %1 = bitcast <2 x i64> %0 to <16 x i8>
+  %2 = load <2 x i64>,  <2 x i64>* %b, align 16
+  %3 = bitcast <2 x i64> %2 to <16 x i8>
+  %4 = load <2 x i64>,  <2 x i64>* %c, align 16
+  %5 = bitcast <2 x i64> %4 to <16 x i8>
+  %6 = call <16 x i8> @llvm.ppc.altivec.crypto.vpermxor(<16 x i8> %1, <16 x i8> %3, <16 x i8> %5)
+  %7 = bitcast <16 x i8> %6 to <2 x i64>
+  ret <2 x i64> %7
+; CHECK: vpermxor 2,
+}
+
+; Function Attrs: nounwind
+define <2 x i64> @test_vcipher() #0 {
+entry:
+  %a = alloca <2 x i64>, align 16
+  %b = alloca <2 x i64>, align 16
+  store <2 x i64> <i64 72623859790382856, i64 651345242494996240>, <2 x i64>* %a, align 16
+  store <2 x i64> <i64 8174723217654970232, i64 8753444600359583600>, <2 x i64>* %b, align 16
+  %0 = load <2 x i64>,  <2 x i64>* %a, align 16
+  %1 = load <2 x i64>,  <2 x i64>* %b, align 16
+  %2 = call <2 x i64> @llvm.ppc.altivec.crypto.vcipher(<2 x i64> %0, <2 x i64> %1)
+  ret <2 x i64> %2
+; CHECK: vcipher 2,
+}
+
+; Function Attrs: nounwind readnone
+declare <2 x i64> @llvm.ppc.altivec.crypto.vcipher(<2 x i64>, <2 x i64>) #1
+
+; Function Attrs: nounwind
+define <2 x i64> @test_vcipherlast() #0 {
+entry:
+  %a = alloca <2 x i64>, align 16
+  %b = alloca <2 x i64>, align 16
+  store <2 x i64> <i64 72623859790382856, i64 651345242494996240>, <2 x i64>* %a, align 16
+  store <2 x i64> <i64 8174723217654970232, i64 8753444600359583600>, <2 x i64>* %b, align 16
+  %0 = load <2 x i64>,  <2 x i64>* %a, align 16
+  %1 = load <2 x i64>,  <2 x i64>* %b, align 16
+  %2 = call <2 x i64> @llvm.ppc.altivec.crypto.vcipherlast(<2 x i64> %0, <2 x i64> %1)
+  ret <2 x i64> %2
+; CHECK: vcipherlast 2,
+}
+
+; Function Attrs: nounwind readnone
+declare <2 x i64> @llvm.ppc.altivec.crypto.vcipherlast(<2 x i64>, <2 x i64>) #1
+
+; Function Attrs: nounwind
+define <2 x i64> @test_vncipher() #0 {
+entry:
+  %a = alloca <2 x i64>, align 16
+  %b = alloca <2 x i64>, align 16
+  store <2 x i64> <i64 72623859790382856, i64 651345242494996240>, <2 x i64>* %a, align 16
+  store <2 x i64> <i64 8174723217654970232, i64 8753444600359583600>, <2 x i64>* %b, align 16
+  %0 = load <2 x i64>,  <2 x i64>* %a, align 16
+  %1 = load <2 x i64>,  <2 x i64>* %b, align 16
+  %2 = call <2 x i64> @llvm.ppc.altivec.crypto.vncipher(<2 x i64> %0, <2 x i64> %1)
+  ret <2 x i64> %2
+; CHECK: vncipher 2,
+}
+
+; Function Attrs: nounwind readnone
+declare <2 x i64> @llvm.ppc.altivec.crypto.vncipher(<2 x i64>, <2 x i64>) #1
+
+; Function Attrs: nounwind
+define <2 x i64> @test_vncipherlast() #0 {
+entry:
+  %a = alloca <2 x i64>, align 16
+  %b = alloca <2 x i64>, align 16
+  store <2 x i64> <i64 72623859790382856, i64 651345242494996240>, <2 x i64>* %a, align 16
+  store <2 x i64> <i64 8174723217654970232, i64 8753444600359583600>, <2 x i64>* %b, align 16
+  %0 = load <2 x i64>,  <2 x i64>* %a, align 16
+  %1 = load <2 x i64>,  <2 x i64>* %b, align 16
+  %2 = call <2 x i64> @llvm.ppc.altivec.crypto.vncipherlast(<2 x i64> %0, <2 x i64> %1)
+  ret <2 x i64> %2
+; CHECK: vncipherlast 2,
+}
+
+; Function Attrs: nounwind readnone
+declare <2 x i64> @llvm.ppc.altivec.crypto.vncipherlast(<2 x i64>, <2 x i64>) #1
+
+; Function Attrs: nounwind
+define <4 x i32> @test_vshasigmaw() #0 {
+entry:
+  %a = alloca <4 x i32>, align 16
+  store <4 x i32> <i32 16909060, i32 84281096, i32 151653132, i32 219025168>, <4 x i32>* %a, align 16
+  %0 = load <4 x i32>,  <4 x i32>* %a, align 16
+  %1 = call <4 x i32> @llvm.ppc.altivec.crypto.vshasigmaw(<4 x i32> %0, i32 1, i32 15)
+  ret <4 x i32> %1
+; CHECK: vshasigmaw 2,
+}
+
+; Function Attrs: nounwind readnone
+declare <4 x i32> @llvm.ppc.altivec.crypto.vshasigmaw(<4 x i32>, i32, i32) #1
+
+; Function Attrs: nounwind
+define <2 x i64> @test_vshasigmad() #0 {
+entry:
+  %a = alloca <2 x i64>, align 16
+  store <2 x i64> <i64 8174723217654970232, i64 8753444600359583600>, <2 x i64>* %a, align 16
+  %0 = load <2 x i64>,  <2 x i64>* %a, align 16
+  %1 = call <2 x i64> @llvm.ppc.altivec.crypto.vshasigmad(<2 x i64> %0, i32 1, i32 15)
+  ret <2 x i64> %1
+; CHECK: vshasigmad 2,
+}
+
+; Function Attrs: nounwind readnone
+declare <2 x i64> @llvm.ppc.altivec.crypto.vshasigmad(<2 x i64>, i32, i32) #1
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+
+!llvm.ident = !{!0}
+
+!0 = !{!"clang version 3.7.0 (trunk 230949) (llvm/trunk 230946)"}
diff --git a/test/MC/Disassembler/PowerPC/ppc64-encoding-vmx.txt b/test/MC/Disassembler/PowerPC/ppc64-encoding-vmx.txt
index 0e56f8ed361..4424d69454c 100644
--- a/test/MC/Disassembler/PowerPC/ppc64-encoding-vmx.txt
+++ b/test/MC/Disassembler/PowerPC/ppc64-encoding-vmx.txt
@@ -120,6 +120,42 @@
 # CHECK: vperm 2, 3, 4, 5                
 0x10 0x43 0x21 0x6b
 
+# CHECK: vpermxor 2, 3, 4, 5
+0x10 0x43 0x21 0x6d
+
+# CHECK: vsbox 2, 5
+0x10 0x45 0x05 0xc8
+
+# CHECK: vcipher 2, 5, 17
+0x10 0x45 0x8d 0x08
+
+# CHECK: vcipherlast 2, 5, 17
+0x10 0x45 0x8d 0x09
+
+# CHECK: vncipher 2, 5, 17
+0x10,0x45,0x8d,0x48
+
+# CHECK: vncipherlast 2, 5, 17
+0x10,0x45,0x8d,0x49
+
+# CHECK: vpmsumb 2, 5, 17
+0x10 0x45 0x8c 0x08
+
+# CHECK: vpmsumh 2, 5, 17
+0x10 0x45 0x8c 0x48
+
+# CHECK: vpmsumw 2, 5, 17
+0x10 0x45 0x8c 0x88
+
+# CHECK: vpmsumd 2, 5, 17
+0x10 0x45 0x8c 0xc8
+
+# CHECK: vshasigmaw 2, 3, 0, 11
+0x10 0x43 0x5e 0x82
+
+# CHECK: vshasigmad 2, 3, 1, 15
+0x10 0x43 0xfe 0xc2
+
 # CHECK: vsel 2, 3, 4, 5                 
 0x10 0x43 0x21 0x6a
 
diff --git a/test/MC/PowerPC/ppc64-encoding-vmx.s b/test/MC/PowerPC/ppc64-encoding-vmx.s
index 7267f34186d..8f7e91903af 100644
--- a/test/MC/PowerPC/ppc64-encoding-vmx.s
+++ b/test/MC/PowerPC/ppc64-encoding-vmx.s
@@ -133,6 +133,55 @@
 # CHECK-BE: vperm 2, 3, 4, 5                # encoding: [0x10,0x43,0x21,0x6b]
 # CHECK-LE: vperm 2, 3, 4, 5                # encoding: [0x6b,0x21,0x43,0x10]
             vperm 2, 3, 4, 5
+
+# CHECK-BE: vpermxor 2, 3, 4, 5             # encoding: [0x10,0x43,0x21,0x6d]
+# CHECK-LE: vpermxor 2, 3, 4, 5             # encoding: [0x6d,0x21,0x43,0x10]
+            vpermxor 2, 3, 4, 5
+
+# CHECK-BE: vsbox 2, 5                      # encoding: [0x10,0x45,0x05,0xc8]
+# CHECK-LE: vsbox 2, 5                      # encoding: [0xc8,0x05,0x45,0x10]
+            vsbox 2, 5
+
+# CHECK-BE: vcipher 2, 5, 17                # encoding: [0x10,0x45,0x8d,0x08]
+# CHECK-LE: vcipher 2, 5, 17                # encoding: [0x08,0x8d,0x45,0x10]
+            vcipher 2, 5, 17
+
+# CHECK-BE: vcipherlast 2, 5, 17            # encoding: [0x10,0x45,0x8d,0x09]
+# CHECK-LE: vcipherlast 2, 5, 17            # encoding: [0x09,0x8d,0x45,0x10]
+            vcipherlast 2, 5, 17
+
+# CHECK-BE: vncipher 2, 5, 17               # encoding: [0x10,0x45,0x8d,0x48]
+# CHECK-LE: vncipher 2, 5, 17               # encoding: [0x48,0x8d,0x45,0x10]
+            vncipher 2, 5, 17
+
+# CHECK-BE: vncipherlast 2, 5, 17           # encoding: [0x10,0x45,0x8d,0x49]
+# CHECK-LE: vncipherlast 2, 5, 17           # encoding: [0x49,0x8d,0x45,0x10]
+            vncipherlast 2, 5, 17
+
+# CHECK-BE: vpmsumb 2, 5, 17                # encoding: [0x10,0x45,0x8c,0x08]
+# CHECK-LE: vpmsumb 2, 5, 17                # encoding: [0x08,0x8c,0x45,0x10]
+            vpmsumb 2, 5, 17
+
+# CHECK-BE: vpmsumh 2, 5, 17                # encoding: [0x10,0x45,0x8c,0x48]
+# CHECK-LE: vpmsumh 2, 5, 17                # encoding: [0x48,0x8c,0x45,0x10]
+            vpmsumh 2, 5, 17
+
+# CHECK-BE: vpmsumw 2, 5, 17                # encoding: [0x10,0x45,0x8c,0x88]
+# CHECK-LE: vpmsumw 2, 5, 17                # encoding: [0x88,0x8c,0x45,0x10]
+            vpmsumw 2, 5, 17
+
+# CHECK-BE: vpmsumd 2, 5, 17                # encoding: [0x10,0x45,0x8c,0xc8]
+# CHECK-LE: vpmsumd 2, 5, 17                # encoding: [0xc8,0x8c,0x45,0x10]
+            vpmsumd 2, 5, 17
+
+# CHECK-BE: vshasigmaw 2, 3, 0, 11          # encoding: [0x10,0x43,0x5e,0x82]
+# CHECK-LE: vshasigmaw 2, 3, 0, 11          # encoding: [0x82,0x5e,0x43,0x10]
+            vshasigmaw 2, 3, 0, 11
+
+# CHECK-BE: vshasigmad 2, 3, 1, 15          # encoding: [0x10,0x43,0xfe,0xc2]
+# CHECK-LE: vshasigmad 2, 3, 1, 15          # encoding: [0xc2,0xfe,0x43,0x10]
+            vshasigmad 2, 3, 1, 15
+
 # CHECK-BE: vsel 2, 3, 4, 5                 # encoding: [0x10,0x43,0x21,0x6a]
 # CHECK-LE: vsel 2, 3, 4, 5                 # encoding: [0x6a,0x21,0x43,0x10]
             vsel 2, 3, 4, 5
-- 
2.34.1