Skip to content

Commit

Permalink
This patch contains support for encoding FMA4 instructions and
Browse files Browse the repository at this point in the history
tablegen patterns for scalar FMA4 operations and intrinsic. Also
add tests for vfmaddsd.

Patch by Jan Sjodin

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@145133 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information
bcardosolopes committed Nov 25, 2011
1 parent 70aaf37 commit 1b9b377
Show file tree
Hide file tree
Showing 7 changed files with 118 additions and 7 deletions.
10 changes: 10 additions & 0 deletions include/llvm/IntrinsicsX86.td
Original file line number Diff line number Diff line change
Expand Up @@ -1821,6 +1821,16 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
Intrinsic<[llvm_v4i64_ty], [llvm_ptr_ty], [IntrReadMem]>;
}

//===----------------------------------------------------------------------===//
// FMA4

let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_fma4_vfmadd_sd : GCCBuiltin<"__builtin_ia32_vfmaddsd">,
Intrinsic<[llvm_v2f64_ty],
[llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty],
[IntrNoMem]>;
}

//===----------------------------------------------------------------------===//
// MMX

Expand Down
10 changes: 9 additions & 1 deletion lib/Target/X86/MCTargetDesc/X86BaseInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -418,7 +418,12 @@ namespace X86II {
/// storing a classifier in the imm8 field. To simplify our implementation,
/// we handle this by storeing the classifier in the opcode field and using
/// this flag to indicate that the encoder should do the wacky 3DNow! thing.
Has3DNow0F0FOpcode = 1U << 7
Has3DNow0F0FOpcode = 1U << 7,

/// XOP_W - Same bit as VEX_W. Used to indicate swapping of
/// operand 3 and 4 to be encoded in ModRM or I8IMM. This is used
/// for FMA4 and XOP instructions.
XOP_W = 1U << 8
};

// getBaseOpcodeFor - This function returns the "base" X86 opcode for the
Expand Down Expand Up @@ -488,9 +493,12 @@ namespace X86II {
return 0;
case X86II::MRMSrcMem: {
bool HasVEX_4V = (TSFlags >> X86II::VEXShift) & X86II::VEX_4V;
bool HasXOP_W = (TSFlags >> X86II::VEXShift) & X86II::XOP_W;
unsigned FirstMemOp = 1;
if (HasVEX_4V)
++FirstMemOp;// Skip the register source (which is encoded in VEX_VVVV).
if (HasXOP_W)
++FirstMemOp;// Skip the register source (which is encoded in I8IMM).

// FIXME: Maybe lea should have its own form? This is a horrible hack.
//if (Opcode == X86::LEA64r || Opcode == X86::LEA64_32r ||
Expand Down
34 changes: 29 additions & 5 deletions lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -415,6 +415,10 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
// opcode extension, or ignored, depending on the opcode byte)
unsigned char VEX_W = 0;

// XOP_W: opcode specific, same bit as VEX_W, but used to
// swap operand 3 and 4 for FMA4 and XOP instructions
unsigned char XOP_W = 0;

// VEX_5M (VEX m-mmmmm field):
//
// 0b00000: Reserved for future use
Expand Down Expand Up @@ -453,6 +457,9 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
if ((TSFlags >> X86II::VEXShift) & X86II::VEX_W)
VEX_W = 1;

if ((TSFlags >> X86II::VEXShift) & X86II::XOP_W)
XOP_W = 1;

if ((TSFlags >> X86II::VEXShift) & X86II::VEX_L)
VEX_L = 1;

Expand Down Expand Up @@ -529,6 +536,9 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
// src1(ModR/M), MemAddr, imm8
// src1(ModR/M), MemAddr, src2(VEX_I8IMM)
//
// FMA4:
// dst(ModR/M.reg), src1(VEX_4V), src2(ModR/M), src3(VEX_I8IMM)
// dst(ModR/M.reg), src1(VEX_4V), src2(VEX_I8IMM), src3(ModR/M),
if (X86II::isX86_64ExtendedReg(MI.getOperand(0).getReg()))
VEX_R = 0x0;

Expand Down Expand Up @@ -629,7 +639,7 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
// 3 byte VEX prefix
EmitByte(0xC4, CurByte, OS);
EmitByte(VEX_R << 7 | VEX_X << 6 | VEX_B << 5 | VEX_5M, CurByte, OS);
EmitByte(LastByte | (VEX_W << 7), CurByte, OS);
EmitByte(LastByte | ((VEX_W | XOP_W) << 7), CurByte, OS);
}

/// DetermineREXPrefix - Determine if the MCInst has to be encoded with a X86-64
Expand Down Expand Up @@ -889,6 +899,8 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
// It uses the VEX.VVVV field?
bool HasVEX_4V = (TSFlags >> X86II::VEXShift) & X86II::VEX_4V;
bool HasVEX_4VOp3 = (TSFlags >> X86II::VEXShift) & X86II::VEX_4VOp3;
bool HasXOP_W = (TSFlags >> X86II::VEXShift) & X86II::XOP_W;
unsigned XOP_W_I8IMMOperand = 2;

// Determine where the memory operand starts, if present.
int MemoryOperand = X86II::getMemoryOperandNo(TSFlags, Opcode);
Expand Down Expand Up @@ -961,6 +973,10 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV)
SrcRegNum++;

// GAS sets the XOP_W even with register operands, we want to match this.
// XOP_W is ignored, so there is no swapping of the operands
XOP_W_I8IMMOperand = 3;

EmitRegModRMByte(MI.getOperand(SrcRegNum),
GetX86RegNum(MI.getOperand(CurOp)), CurByte, OS);
CurOp = SrcRegNum + 1;
Expand All @@ -975,14 +991,20 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
++AddrOperands;
++FirstMemOp; // Skip the register source (which is encoded in VEX_VVVV).
}
if(HasXOP_W) // Skip second register source (encoded in I8IMM)
++FirstMemOp;

EmitByte(BaseOpcode, CurByte, OS);

EmitMemModRMByte(MI, FirstMemOp, GetX86RegNum(MI.getOperand(CurOp)),
TSFlags, CurByte, OS, Fixups);
CurOp += AddrOperands + 1;
if (HasVEX_4VOp3)
++CurOp;
if(HasXOP_W) {
CurOp = NumOps - 1; // We have consumed all except one operand (third)
} else {
CurOp += AddrOperands + 1;
if (HasVEX_4VOp3)
++CurOp;
}
break;
}

Expand Down Expand Up @@ -1064,7 +1086,9 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
// The last source register of a 4 operand instruction in AVX is encoded
// in bits[7:4] of a immediate byte, and bits[3:0] are ignored.
if ((TSFlags >> X86II::VEXShift) & X86II::VEX_I8IMM) {
const MCOperand &MO = MI.getOperand(CurOp++);
const MCOperand &MO = MI.getOperand(HasXOP_W ? XOP_W_I8IMMOperand
: CurOp);
CurOp++;
bool IsExtReg = X86II::isX86_64ExtendedReg(MO.getReg());
unsigned RegNum = (IsExtReg ? (1 << 7) : 0);
RegNum |= GetX86RegNum(MO) << 4;
Expand Down
39 changes: 39 additions & 0 deletions lib/Target/X86/X86InstrFMA.td
Original file line number Diff line number Diff line change
Expand Up @@ -58,3 +58,42 @@ let isAsmParserOnly = 1 in {
defm VFNMSUBPS : fma_forms<0x9E, 0xAE, 0xBE, "vfnmsub", "ps">;
defm VFNMSUBPD : fma_forms<0x9E, 0xAE, 0xBE, "vfnmsub", "pd">, VEX_W;
}

//===----------------------------------------------------------------------===//
// FMA4 - AMD 4 operand Fused Multiply-Add instructions
//===----------------------------------------------------------------------===//


multiclass fma4s<bits<8> opc, string OpcodeStr> {
def rr : FMA4<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, VR128:$src3),
!strconcat(OpcodeStr,
"\t{$src2, $src3, $src1, $dst|$dst, $src1, $src3, $src2}"),
[]>, XOP_W;
def rm : FMA4<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, f128mem:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[]>, XOP_W;
def mr : FMA4<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, f128mem:$src2, VR128:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[]>;

}

let isAsmParserOnly = 1 in {
defm VFMADDSD4 : fma4s<0x6B, "vfmaddsd">;
}

// FMA4 Intrinsics patterns

def : Pat<(int_x86_fma4_vfmadd_sd VR128:$src1, VR128:$src2, VR128:$src3),
(VFMADDSD4rr VR128:$src1, VR128:$src2, VR128:$src3)>;
def : Pat<(int_x86_fma4_vfmadd_sd VR128:$src1, VR128:$src2,
(alignedloadv2f64 addr:$src3)),
(VFMADDSD4rm VR128:$src1, VR128:$src2, addr:$src3)>;
def : Pat<(int_x86_fma4_vfmadd_sd VR128:$src1, (alignedloadv2f64 addr:$src2),
VR128:$src3),
(VFMADDSD4mr VR128:$src1, addr:$src2, VR128:$src3)>;
10 changes: 9 additions & 1 deletion lib/Target/X86/X86InstrFormats.td
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ class VEX_I8IMM { bit hasVEX_i8ImmReg = 1; }
class VEX_L { bit hasVEX_L = 1; }
class VEX_LIG { bit ignoresVEX_L = 1; }
class Has3DNow0F0FOpcode { bit has3DNow0F0FOpcode = 1; }

class XOP_W { bit hasXOP_WPrefix = 1; }
class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
string AsmStr, Domain d = GenericDomain>
: Instruction {
Expand Down Expand Up @@ -158,6 +158,7 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
bit hasVEX_L = 0; // Does this inst use large (256-bit) registers?
bit ignoresVEX_L = 0; // Does this instruction ignore the L-bit
bit has3DNow0F0FOpcode =0;// Wacky 3dNow! encoding?
bit hasXOP_WPrefix = 0; // Same bit as VEX_W, but used for swapping operands

// TSFlags layout should be kept in sync with X86InstrInfo.h.
let TSFlags{5-0} = FormBits;
Expand All @@ -179,6 +180,7 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
let TSFlags{38} = hasVEX_L;
let TSFlags{39} = ignoresVEX_L;
let TSFlags{40} = has3DNow0F0FOpcode;
let TSFlags{41} = hasXOP_WPrefix;
}

class PseudoI<dag oops, dag iops, list<dag> pattern>
Expand Down Expand Up @@ -496,6 +498,12 @@ class FMA3<bits<8> o, Format F, dag outs, dag ins, string asm,
: I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8,
OpSize, VEX_4V, Requires<[HasFMA3]>;

// FMA4 Instruction Templates
class FMA4<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag>pattern>
: I<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA,
OpSize, VEX_4V, VEX_I8IMM, Requires<[HasFMA4]>;

// X86-64 Instruction templates...
//

Expand Down
9 changes: 9 additions & 0 deletions test/CodeGen/X86/fma4-intrinsics-x86_64.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -march=x86-64 -mattr=fma4 | FileCheck %s

define < 2 x double > @test_x86_fma4_vfmadd_sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
; CHECK: vfmaddsd
%res = call < 2 x double > @llvm.x86.fma4.vfmadd.sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1]
ret < 2 x double > %res
}
declare < 2 x double > @llvm.x86.fma4.vfmadd.sd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone

13 changes: 13 additions & 0 deletions test/MC/X86/x86_64-fma4-encoding.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
// RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding %s | FileCheck %s

// CHECK: vfmaddsd (%rcx), %xmm1, %xmm0, %xmm0
// CHECK: encoding: [0xc4,0xe3,0xf9,0x6b,0x01,0x10]
vfmaddsd (%rcx), %xmm1, %xmm0, %xmm0

// CHECK: vfmaddsd %xmm1, (%rcx), %xmm0, %xmm0
// CHECK: encoding: [0xc4,0xe3,0x79,0x6b,0x01,0x10]
vfmaddsd %xmm1, (%rcx),%xmm0, %xmm0

// CHECK: vfmaddsd %xmm2, %xmm1, %xmm0, %xmm0
// CHECK: encoding: [0xc4,0xe3,0xf9,0x6b,0xc2,0x10]
vfmaddsd %xmm2, %xmm1, %xmm0, %xmm0

0 comments on commit 1b9b377

Please sign in to comment.