Skip to content

Commit

Permalink
[ARMv8] Add support for the v8 cryptography extensions.
Browse files Browse the repository at this point in the history
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190996 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information
aemerson committed Sep 19, 2013
1 parent adadf88 commit 5df37da
Show file tree
Hide file tree
Showing 16 changed files with 459 additions and 17 deletions.
17 changes: 17 additions & 0 deletions include/llvm/IR/IntrinsicsARM.td
Original file line number Diff line number Diff line change
Expand Up @@ -466,4 +466,21 @@ def int_arm_neon_vbsl : Intrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
[IntrNoMem]>;


// Crypto instructions
def int_arm_neon_aesd : Neon_2Arg_Intrinsic;
def int_arm_neon_aese : Neon_2Arg_Intrinsic;
def int_arm_neon_aesimc : Neon_1Arg_Intrinsic;
def int_arm_neon_aesmc : Neon_1Arg_Intrinsic;
def int_arm_neon_sha1h : Neon_1Arg_Intrinsic;
def int_arm_neon_sha1su1 : Neon_2Arg_Intrinsic;
def int_arm_neon_sha256su0 : Neon_2Arg_Intrinsic;
def int_arm_neon_sha1c : Neon_3Arg_Intrinsic;
def int_arm_neon_sha1m : Neon_3Arg_Intrinsic;
def int_arm_neon_sha1p : Neon_3Arg_Intrinsic;
def int_arm_neon_sha1su0: Neon_3Arg_Intrinsic;
def int_arm_neon_sha256h: Neon_3Arg_Intrinsic;
def int_arm_neon_sha256h2: Neon_3Arg_Intrinsic;
def int_arm_neon_sha256su1: Neon_3Arg_Intrinsic;

} // end TargetPrefix
3 changes: 3 additions & 0 deletions lib/Target/ARM/ARM.td
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,9 @@ def FeaturePerfMon : SubtargetFeature<"perfmon", "HasPerfMon", "true",
"Enable support for Performance Monitor extensions">;
def FeatureTrustZone : SubtargetFeature<"trustzone", "HasTrustZone", "true",
"Enable support for TrustZone security extensions">;
def FeatureCrypto : SubtargetFeature<"crypto", "HasCrypto", "true",
"Enable support for Cryptography extensions",
[FeatureNEON]>;

// Some processors have FP multiply-accumulate instructions that don't
// play nicely with other VFP / NEON instructions, and it's generally better
Expand Down
4 changes: 2 additions & 2 deletions lib/Target/ARM/ARMInstrFormats.td
Original file line number Diff line number Diff line change
Expand Up @@ -2015,7 +2015,7 @@ class N2V<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16,
}

// Same as N2V but not predicated.
class N2Vnp<bits<2> op17_16, bits<3> op10_8, bit op7, bit op6,
class N2Vnp<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op7, bit op6,
dag oops, dag iops, InstrItinClass itin, string OpcodeStr,
string Dt, ValueType ResTy, ValueType OpTy, list<dag> pattern>
: NeonInp<oops, iops, AddrModeNone, IndexModeNone, N2RegFrm, itin,
Expand All @@ -2032,7 +2032,7 @@ class N2Vnp<bits<2> op17_16, bits<3> op10_8, bit op7, bit op6,
// Encode constant bits
let Inst{27-23} = 0b00111;
let Inst{21-20} = 0b11;
let Inst{19-18} = 0b10;
let Inst{19-18} = op19_18;
let Inst{17-16} = op17_16;
let Inst{11} = 0;
let Inst{10-8} = op10_8;
Expand Down
2 changes: 2 additions & 0 deletions lib/Target/ARM/ARMInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,8 @@ def HasFPARMv8 : Predicate<"Subtarget->hasFPARMv8()">,
AssemblerPredicate<"FeatureFPARMv8", "FPARMv8">;
def HasNEON : Predicate<"Subtarget->hasNEON()">,
AssemblerPredicate<"FeatureNEON", "NEON">;
def HasCrypto : Predicate<"Subtarget->hasCrypto()">,
AssemblerPredicate<"FeatureCrypto", "crypto">;
def HasFP16 : Predicate<"Subtarget->hasFP16()">,
AssemblerPredicate<"FeatureFP16","half-float">;
def HasDivide : Predicate<"Subtarget->hasDivide()">,
Expand Down
111 changes: 102 additions & 9 deletions lib/Target/ARM/ARMInstrNEON.td
Original file line number Diff line number Diff line change
Expand Up @@ -2355,17 +2355,36 @@ class N2VQInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
class N2VDIntnp<bits<2> op17_16, bits<3> op10_8, bit op7,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
: N2Vnp<op17_16, op10_8, op7, 0, (outs DPR:$Vd), (ins DPR:$Vm),
: N2Vnp<0b10, op17_16, op10_8, op7, 0, (outs DPR:$Vd), (ins DPR:$Vm),
itin, OpcodeStr, Dt, ResTy, OpTy,
[(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>;

class N2VQIntnp<bits<2> op17_16, bits<3> op10_8, bit op7,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
: N2Vnp<op17_16, op10_8, op7, 1, (outs QPR:$Vd), (ins QPR:$Vm),
: N2Vnp<0b10, op17_16, op10_8, op7, 1, (outs QPR:$Vd), (ins QPR:$Vm),
itin, OpcodeStr, Dt, ResTy, OpTy,
[(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>;

// Similar to NV2VQIntnp with some more encoding bits exposed (crypto).
class N2VQIntXnp<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op6,
bit op7, InstrItinClass itin, string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
: N2Vnp<op19_18, op17_16, op10_8, op7, op6, (outs QPR:$Vd), (ins QPR:$Vm),
itin, OpcodeStr, Dt, ResTy, OpTy,
[(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>;

// Same as N2VQIntXnp but with Vd as a src register.
class N2VQIntX2np<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op6,
bit op7, InstrItinClass itin, string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
: N2Vnp<op19_18, op17_16, op10_8, op7, op6,
(outs QPR:$Vd), (ins QPR:$src, QPR:$Vm),
itin, OpcodeStr, Dt, ResTy, OpTy,
[(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src), (OpTy QPR:$Vm))))]> {
let Constraints = "$src = $Vd";
}

// Narrow 2-register operations.
class N2VN<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
Expand Down Expand Up @@ -2534,7 +2553,7 @@ class N3VDIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6,
string Dt, ValueType ResTy, ValueType OpTy,
SDPatternOperator IntOp, bit Commutable>
: N3Vnp<op27_23, op21_20, op11_8, op6, op4,
(outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), f, itin, OpcodeStr, Dt,
(outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, OpcodeStr, Dt,
ResTy, OpTy, IntOp, Commutable,
[(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>;

Expand Down Expand Up @@ -2592,6 +2611,19 @@ class N3VQIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6,
ResTy, OpTy, IntOp, Commutable,
[(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>;

// Same as N3VQIntnp but with Vd as a src register.
class N3VQInt3np<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6,
bit op4, Format f, InstrItinClass itin, string OpcodeStr,
string Dt, ValueType ResTy, ValueType OpTy,
SDPatternOperator IntOp, bit Commutable>
: N3Vnp<op27_23, op21_20, op11_8, op6, op4,
(outs QPR:$Vd), (ins QPR:$src, QPR:$Vn, QPR:$Vm), f, itin, OpcodeStr,
Dt, ResTy, OpTy, IntOp, Commutable,
[(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src), (OpTy QPR:$Vn),
(OpTy QPR:$Vm))))]> {
let Constraints = "$src = $Vd";
}

class N3VQIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
Expand Down Expand Up @@ -2842,6 +2874,7 @@ class N3VL<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
[(set QPR:$Vd, (TyQ (OpNode (TyD DPR:$Vn), (TyD DPR:$Vm))))]> {
let isCommutable = Commutable;
}

class N3VLSL<bit op24, bits<2> op21_20, bits<4> op11_8,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType TyQ, ValueType TyD, SDNode OpNode>
Expand Down Expand Up @@ -2897,6 +2930,17 @@ class N3VLInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
[(set QPR:$Vd, (TyQ (IntOp (TyD DPR:$Vn), (TyD DPR:$Vm))))]> {
let isCommutable = Commutable;
}

// Same as above, but not predicated.
class N3VLIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6,
bit op4, InstrItinClass itin, string OpcodeStr,
string Dt, ValueType ResTy, ValueType OpTy,
SDPatternOperator IntOp, bit Commutable>
: N3Vnp<op27_23, op21_20, op11_8, op6, op4,
(outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, OpcodeStr, Dt,
ResTy, OpTy, IntOp, Commutable,
[(set QPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>;

class N3VLIntSL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
Expand Down Expand Up @@ -4078,12 +4122,18 @@ def : Pat<(v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$src1),
(SubReg_i32_lane imm:$lane)))>;

// VMULL : Vector Multiply Long (integer and polynomial) (Q = D * D)
defm VMULLs : N3VL_QHS<0,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D,
"vmull", "s", NEONvmulls, 1>;
defm VMULLu : N3VL_QHS<1,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D,
"vmull", "u", NEONvmullu, 1>;
def VMULLp : N3VLInt<0, 1, 0b00, 0b1110, 0, IIC_VMULi16D, "vmull", "p8",
v8i16, v8i8, int_arm_neon_vmullp, 1>;
let PostEncoderMethod = "NEONThumb2DataIPostEncoder",
DecoderNamespace = "NEONData" in {
defm VMULLs : N3VL_QHS<0,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D,
"vmull", "s", NEONvmulls, 1>;
defm VMULLu : N3VL_QHS<1,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D,
"vmull", "u", NEONvmullu, 1>;
def VMULLp8 : N3VLInt<0, 1, 0b00, 0b1110, 0, IIC_VMULi16D, "vmull", "p8",
v8i16, v8i8, int_arm_neon_vmullp, 1>;
def VMULLp64 : N3VLIntnp<0b00101, 0b10, 0b1110, 0, 0, NoItinerary,
"vmull", "p64", v2i64, v1i64, int_arm_neon_vmullp, 1>,
Requires<[HasV8, HasCrypto]>;
}
defm VMULLsls : N3VLSL_HS<0, 0b1010, IIC_VMULi16D, "vmull", "s", NEONvmulls>;
defm VMULLslu : N3VLSL_HS<1, 0b1010, IIC_VMULi16D, "vmull", "u", NEONvmullu>;

Expand Down Expand Up @@ -5818,6 +5868,49 @@ defm VRINTZN : VRINT_FPI<"z", 0b011, int_arm_neon_vrintz>;
defm VRINTMN : VRINT_FPI<"m", 0b101, int_arm_neon_vrintm>;
defm VRINTPN : VRINT_FPI<"p", 0b111, int_arm_neon_vrintp>;

// Cryptography instructions
let PostEncoderMethod = "NEONThumb2DataIPostEncoder",
DecoderNamespace = "v8Crypto" in {
class AES<string op, bit op7, bit op6, SDPatternOperator Int>
: N2VQIntXnp<0b00, 0b00, 0b011, op6, op7, NoItinerary,
!strconcat("aes", op), "8", v16i8, v16i8, Int>,
Requires<[HasV8, HasCrypto]>;
class AES2Op<string op, bit op7, bit op6, SDPatternOperator Int>
: N2VQIntX2np<0b00, 0b00, 0b011, op6, op7, NoItinerary,
!strconcat("aes", op), "8", v16i8, v16i8, Int>,
Requires<[HasV8, HasCrypto]>;
class N2SHA<string op, bits<2> op17_16, bits<3> op10_8, bit op7, bit op6,
SDPatternOperator Int>
: N2VQIntXnp<0b10, op17_16, op10_8, op6, op7, NoItinerary,
!strconcat("sha", op), "32", v4i32, v4i32, Int>,
Requires<[HasV8, HasCrypto]>;
class N2SHA2Op<string op, bits<2> op17_16, bits<3> op10_8, bit op7, bit op6,
SDPatternOperator Int>
: N2VQIntX2np<0b10, op17_16, op10_8, op6, op7, NoItinerary,
!strconcat("sha", op), "32", v4i32, v4i32, Int>,
Requires<[HasV8, HasCrypto]>;
class N3SHA3Op<string op, bits<5> op27_23, bits<2> op21_20, SDPatternOperator Int>
: N3VQInt3np<op27_23, op21_20, 0b1100, 1, 0, N3RegFrm, NoItinerary,
!strconcat("sha", op), "32", v4i32, v4i32, Int, 0>,
Requires<[HasV8, HasCrypto]>;
}

def AESD : AES2Op<"d", 0, 1, int_arm_neon_aesd>;
def AESE : AES2Op<"e", 0, 0, int_arm_neon_aese>;
def AESIMC : AES<"imc", 1, 1, int_arm_neon_aesimc>;
def AESMC : AES<"mc", 1, 0, int_arm_neon_aesmc>;

def SHA1H : N2SHA<"1h", 0b01, 0b010, 1, 1, int_arm_neon_sha1h>;
def SHA1SU1 : N2SHA2Op<"1su1", 0b10, 0b011, 1, 0, int_arm_neon_sha1su1>;
def SHA256SU0 : N2SHA2Op<"256su0", 0b10, 0b011, 1, 1, int_arm_neon_sha256su0>;
def SHA1C : N3SHA3Op<"1c", 0b00100, 0b00, int_arm_neon_sha1c>;
def SHA1M : N3SHA3Op<"1m", 0b00100, 0b10, int_arm_neon_sha1m>;
def SHA1P : N3SHA3Op<"1p", 0b00100, 0b01, int_arm_neon_sha1p>;
def SHA1SU0 : N3SHA3Op<"1su0", 0b00100, 0b11, int_arm_neon_sha1su0>;
def SHA256H : N3SHA3Op<"256h", 0b00110, 0b00, int_arm_neon_sha256h>;
def SHA256H2 : N3SHA3Op<"256h2", 0b00110, 0b01, int_arm_neon_sha256h2>;
def SHA256SU1 : N3SHA3Op<"256su1", 0b00110, 0b10, int_arm_neon_sha256su1>;

//===----------------------------------------------------------------------===//
// NEON instructions for single-precision FP math
//===----------------------------------------------------------------------===//
Expand Down
1 change: 1 addition & 0 deletions lib/Target/ARM/ARMSubtarget.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@ void ARMSubtarget::initializeEnvironment() {
FPOnlySP = false;
HasPerfMon = false;
HasTrustZone = false;
HasCrypto = false;
AllowsUnalignedMem = false;
Thumb2DSP = false;
UseNaClTrap = false;
Expand Down
4 changes: 4 additions & 0 deletions lib/Target/ARM/ARMSubtarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,9 @@ class ARMSubtarget : public ARMGenSubtargetInfo {
/// HasTrustZone - if true, processor supports TrustZone security extensions
bool HasTrustZone;

/// HasCrypto - if true, processor supports Cryptography extensions
bool HasCrypto;

/// AllowsUnalignedMem - If true, the subtarget allows unaligned memory
/// accesses for some types. For details, see
/// ARMTargetLowering::allowsUnalignedMemoryAccesses().
Expand Down Expand Up @@ -248,6 +251,7 @@ class ARMSubtarget : public ARMGenSubtargetInfo {
bool hasVFP4() const { return HasVFPv4; }
bool hasFPARMv8() const { return HasFPARMv8; }
bool hasNEON() const { return HasNEON; }
bool hasCrypto() const { return HasCrypto; }
bool useNEONForSinglePrecisionFP() const {
return hasNEON() && UseNEONForSinglePrecisionFP; }

Expand Down
13 changes: 8 additions & 5 deletions lib/Target/ARM/AsmParser/ARMAsmParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,8 @@ class ARMAsmParser : public MCTargetAsmParser {
StringRef splitMnemonic(StringRef Mnemonic, unsigned &PredicationCode,
bool &CarrySetting, unsigned &ProcessorIMod,
StringRef &ITMask);
void getMnemonicAcceptInfo(StringRef Mnemonic, bool &CanAcceptCarrySet,
void getMnemonicAcceptInfo(StringRef Mnemonic, StringRef FullInst,
bool &CanAcceptCarrySet,
bool &CanAcceptPredicationCode);

bool isThumb() const {
Expand Down Expand Up @@ -4784,8 +4785,8 @@ StringRef ARMAsmParser::splitMnemonic(StringRef Mnemonic,
//
// FIXME: It would be nice to autogen this.
void ARMAsmParser::
getMnemonicAcceptInfo(StringRef Mnemonic, bool &CanAcceptCarrySet,
bool &CanAcceptPredicationCode) {
getMnemonicAcceptInfo(StringRef Mnemonic, StringRef FullInst,
bool &CanAcceptCarrySet, bool &CanAcceptPredicationCode) {
if (Mnemonic == "and" || Mnemonic == "lsl" || Mnemonic == "lsr" ||
Mnemonic == "rrx" || Mnemonic == "ror" || Mnemonic == "sub" ||
Mnemonic == "add" || Mnemonic == "adc" ||
Expand All @@ -4808,7 +4809,9 @@ getMnemonicAcceptInfo(StringRef Mnemonic, bool &CanAcceptCarrySet,
Mnemonic == "vmaxnm" || Mnemonic == "vminnm" || Mnemonic == "vcvta" ||
Mnemonic == "vcvtn" || Mnemonic == "vcvtp" || Mnemonic == "vcvtm" ||
Mnemonic == "vrinta" || Mnemonic == "vrintn" || Mnemonic == "vrintp" ||
Mnemonic == "vrintm") {
Mnemonic == "vrintm" || Mnemonic.startswith("aes") ||
Mnemonic.startswith("sha1") || Mnemonic.startswith("sha256") ||
(FullInst.startswith("vmull") && FullInst.endswith(".p64"))) {
// These mnemonics are never predicable
CanAcceptPredicationCode = false;
} else if (!isThumb()) {
Expand Down Expand Up @@ -5068,7 +5071,7 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
// the matcher deal with finding the right instruction or generating an
// appropriate error.
bool CanAcceptCarrySet, CanAcceptPredicationCode;
getMnemonicAcceptInfo(Mnemonic, CanAcceptCarrySet, CanAcceptPredicationCode);
getMnemonicAcceptInfo(Mnemonic, Name, CanAcceptCarrySet, CanAcceptPredicationCode);

// If we had a carry-set on an instruction that can't do that, issue an
// error.
Expand Down
20 changes: 20 additions & 0 deletions lib/Target/ARM/Disassembler/ARMDisassembler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -506,6 +506,14 @@ DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
return result;
}

MI.clear();
result = decodeInstruction(DecoderTablev8Crypto32, MI, insn, Address,
this, STI);
if (result != MCDisassembler::Fail) {
Size = 4;
return result;
}

MI.clear();
Size = 0;
return MCDisassembler::Fail;
Expand Down Expand Up @@ -825,6 +833,18 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
}
}

MI.clear();
uint32_t NEONCryptoInsn = insn32;
NEONCryptoInsn &= 0xF0FFFFFF; // Clear bits 27-24
NEONCryptoInsn |= (NEONCryptoInsn & 0x10000000) >> 4; // Move bit 28 to bit 24
NEONCryptoInsn |= 0x12000000; // Set bits 28 and 25
result = decodeInstruction(DecoderTablev8Crypto32, MI, NEONCryptoInsn,
Address, this, STI);
if (result != MCDisassembler::Fail) {
Size = 4;
return result;
}

MI.clear();
uint32_t NEONv8Insn = insn32;
NEONv8Insn &= 0xF3FFFFFF; // Clear bits 27-26
Expand Down
57 changes: 57 additions & 0 deletions test/CodeGen/ARM/intrinsics-crypto.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
; RUN: llc < %s -mtriple=armv8 -mattr=+crypto | FileCheck %s

define arm_aapcs_vfpcc <16 x i8> @test_aesde(<16 x i8>* %a, <16 x i8> *%b) {
%tmp = load <16 x i8>* %a
%tmp2 = load <16 x i8>* %b
%tmp3 = call <16 x i8> @llvm.arm.neon.aesd.v16i8(<16 x i8> %tmp, <16 x i8> %tmp2)
; CHECK: aesd.8 q{{[0-9]+}}, q{{[0-9]+}}
%tmp4 = call <16 x i8> @llvm.arm.neon.aese.v16i8(<16 x i8> %tmp3, <16 x i8> %tmp2)
; CHECK: aese.8 q{{[0-9]+}}, q{{[0-9]+}}
%tmp5 = call <16 x i8> @llvm.arm.neon.aesimc.v16i8(<16 x i8> %tmp4)
; CHECK: aesimc.8 q{{[0-9]+}}, q{{[0-9]+}}
%tmp6 = call <16 x i8> @llvm.arm.neon.aesmc.v16i8(<16 x i8> %tmp5)
; CHECK: aesmc.8 q{{[0-9]+}}, q{{[0-9]+}}
ret <16 x i8> %tmp6
}

define arm_aapcs_vfpcc <4 x i32> @test_sha(<4 x i32> *%a, <4 x i32> *%b, <4 x i32> *%c) {
%tmp = load <4 x i32>* %a
%tmp2 = load <4 x i32>* %b
%tmp3 = load <4 x i32>* %c
%res1 = call <4 x i32> @llvm.arm.neon.sha1h.v4i32(<4 x i32> %tmp)
; CHECK: sha1h.32 q{{[0-9]+}}, q{{[0-9]+}}
%res2 = call <4 x i32> @llvm.arm.neon.sha1c.v4i32(<4 x i32> %tmp2, <4 x i32> %tmp3, <4 x i32> %res1)
; CHECK: sha1c.32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
%res3 = call <4 x i32> @llvm.arm.neon.sha1m.v4i32(<4 x i32> %res2, <4 x i32> %tmp3, <4 x i32> %res1)
; CHECK: sha1m.32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
%res4 = call <4 x i32> @llvm.arm.neon.sha1p.v4i32(<4 x i32> %res3, <4 x i32> %tmp3, <4 x i32> %res1)
; CHECK: sha1p.32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
%res5 = call <4 x i32> @llvm.arm.neon.sha1su0.v4i32(<4 x i32> %res4, <4 x i32> %tmp3, <4 x i32> %res1)
; CHECK: sha1su0.32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
%res6 = call <4 x i32> @llvm.arm.neon.sha1su1.v4i32(<4 x i32> %res5, <4 x i32> %res1)
; CHECK: sha1su1.32 q{{[0-9]+}}, q{{[0-9]+}}
%res7 = call <4 x i32> @llvm.arm.neon.sha256h.v4i32(<4 x i32> %res6, <4 x i32> %tmp3, <4 x i32> %res1)
; CHECK: sha256h.32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
%res8 = call <4 x i32> @llvm.arm.neon.sha256h2.v4i32(<4 x i32> %res7, <4 x i32> %tmp3, <4 x i32> %res1)
; CHECK: sha256h2.32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
%res9 = call <4 x i32> @llvm.arm.neon.sha256su1.v4i32(<4 x i32> %res8, <4 x i32> %tmp3, <4 x i32> %res1)
; CHECK: sha256su1.32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
%res10 = call <4 x i32> @llvm.arm.neon.sha256su0.v4i32(<4 x i32> %res9, <4 x i32> %tmp3)
; CHECK: sha256su0.32 q{{[0-9]+}}, q{{[0-9]+}}
ret <4 x i32> %res10
}

declare <16 x i8> @llvm.arm.neon.aesd.v16i8(<16 x i8>, <16 x i8>)
declare <16 x i8> @llvm.arm.neon.aese.v16i8(<16 x i8>, <16 x i8>)
declare <16 x i8> @llvm.arm.neon.aesimc.v16i8(<16 x i8>)
declare <16 x i8> @llvm.arm.neon.aesmc.v16i8(<16 x i8>)
declare <4 x i32> @llvm.arm.neon.sha1h.v4i32(<4 x i32>)
declare <4 x i32> @llvm.arm.neon.sha1c.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
declare <4 x i32> @llvm.arm.neon.sha1m.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
declare <4 x i32> @llvm.arm.neon.sha1p.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
declare <4 x i32> @llvm.arm.neon.sha1su0.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
declare <4 x i32> @llvm.arm.neon.sha256h.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
declare <4 x i32> @llvm.arm.neon.sha256h2.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
declare <4 x i32> @llvm.arm.neon.sha256su1.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
declare <4 x i32> @llvm.arm.neon.sha256su0.v4i32(<4 x i32>, <4 x i32>)
declare <4 x i32> @llvm.arm.neon.sha1su1.v4i32(<4 x i32>, <4 x i32>)
Loading

0 comments on commit 5df37da

Please sign in to comment.