Skip to content

Commit

Permalink
[X86] Fix handling of maskmovdqu in X32
Browse files Browse the repository at this point in the history
The maskmovdqu instruction is an odd one: it has a 32-bit and a 64-bit
variant, the former using EDI, the latter RDI, but the use of the
register is implicit. In 64-bit mode, a 0x67 prefix can be used to get
the version using EDI, but there is no way to express this in
assembly in a single instruction, the only way is with an explicit
addr32.

This change adds support for the instruction. When generating assembly
text, that explicit addr32 will be added. When not generating assembly
text, it will be kept as a single instruction and will be emitted with
that 0x67 prefix. When parsing assembly text, it will be re-parsed as
ADDR32 followed by MASKMOVDQU64, which still results in the correct
bytes when converted to machine code.

The same applies to vmaskmovdqu as well.

Reviewed By: craig.topper

Differential Revision: https://reviews.llvm.org/D103427
  • Loading branch information
hvdijk committed Jul 15, 2021
1 parent 81ce3aa commit a8ad917
Show file tree
Hide file tree
Showing 10 changed files with 1,389 additions and 20 deletions.
2 changes: 2 additions & 0 deletions llvm/include/llvm/Support/X86DisassemblerDecoderCommon.h
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,8 @@ enum attributeBits {
ENUM_ENTRY(IC_VEX_XS, 2, "requires VEX and the XS prefix") \
ENUM_ENTRY(IC_VEX_XD, 2, "requires VEX and the XD prefix") \
ENUM_ENTRY(IC_VEX_OPSIZE, 2, "requires VEX and the OpSize prefix") \
ENUM_ENTRY(IC_64BIT_VEX_OPSIZE, 4, "requires 64-bit mode and VEX") \
ENUM_ENTRY(IC_64BIT_VEX_OPSIZE_ADSIZE, 5, "requires 64-bit mode, VEX, and AdSize")\
ENUM_ENTRY(IC_VEX_W, 3, "requires VEX and the W prefix") \
ENUM_ENTRY(IC_VEX_W_XS, 4, "requires VEX, W, and XS prefix") \
ENUM_ENTRY(IC_VEX_W_XD, 4, "requires VEX, W, and XD prefix") \
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1119,6 +1119,8 @@ static int getInstructionID(struct InternalInstruction *insn,
switch (ppFromVEX2of2(insn->vectorExtensionPrefix[1])) {
case VEX_PREFIX_66:
attrMask |= ATTR_OPSIZE;
if (insn->hasAdSize)
attrMask |= ATTR_ADSIZE;
break;
case VEX_PREFIX_F3:
attrMask |= ATTR_XS;
Expand Down Expand Up @@ -1175,6 +1177,8 @@ static int getInstructionID(struct InternalInstruction *insn,
case 0x66:
if (insn->mode != MODE_16BIT)
attrMask |= ATTR_OPSIZE;
if (insn->hasAdSize)
attrMask |= ATTR_ADSIZE;
break;
case 0x67:
attrMask |= ATTR_ADSIZE;
Expand Down
20 changes: 18 additions & 2 deletions llvm/lib/Target/X86/X86InstrSSE.td
Original file line number Diff line number Diff line change
Expand Up @@ -4011,7 +4011,15 @@ def VMASKMOVDQU64 : VPDI<0xF7, MRMSrcReg, (outs),
(ins VR128:$src, VR128:$mask),
"maskmovdqu\t{$mask, $src|$src, $mask}",
[(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)]>,
VEX, VEX_WIG;
VEX, VEX_WIG, AdSize64;
let Uses = [EDI], Predicates = [HasAVX,In64BitMode] in
def VMASKMOVDQUX32 : VPDI<0xF7, MRMSrcReg, (outs),
(ins VR128:$src, VR128:$mask), "",
[(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)]>,
VEX, VEX_WIG, AdSize32 {
let AsmString = "addr32 vmaskmovdqu\t{$mask, $src|$src, $mask}";
let AsmVariantName = "NonParsable";
}

let Uses = [EDI], Predicates = [UseSSE2,Not64BitMode] in
def MASKMOVDQU : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask),
Expand All @@ -4020,7 +4028,15 @@ def MASKMOVDQU : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask),
let Uses = [RDI], Predicates = [UseSSE2,In64BitMode] in
def MASKMOVDQU64 : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask),
"maskmovdqu\t{$mask, $src|$src, $mask}",
[(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)]>;
[(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)]>,
AdSize64;
let Uses = [EDI], Predicates = [UseSSE2,In64BitMode] in
def MASKMOVDQUX32 : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask),
"addr32 maskmovdqu\t{$mask, $src|$src, $mask}",
[(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)]>,
AdSize32 {
let AsmVariantName = "NonParsable";
}

} // ExeDomain = SSEPackedInt

Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/Target/X86/X86ScheduleBtVer2.td
Original file line number Diff line number Diff line change
Expand Up @@ -835,8 +835,8 @@ def JWriteMASKMOVDQU: SchedWriteRes<[JFPU0, JFPA, JFPU1, JSTC, JLAGU, JSAGU, JAL
let ResourceCycles = [1, 1, 2, 2, 2, 16, 42];
let NumMicroOps = 63;
}
def : InstRW<[JWriteMASKMOVDQU], (instrs MASKMOVDQU, MASKMOVDQU64,
VMASKMOVDQU, VMASKMOVDQU64)>;
def : InstRW<[JWriteMASKMOVDQU], (instrs MASKMOVDQU, MASKMOVDQU64, MASKMOVDQUX32,
VMASKMOVDQU, VMASKMOVDQU64, VMASKMOVDQUX32)>;

///////////////////////////////////////////////////////////////////////////////
// SchedWriteVariant definitions.
Expand Down
15 changes: 15 additions & 0 deletions llvm/test/CodeGen/X86/maskmovdqu.ll
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-- -mattr=+sse2,-avx | FileCheck %s --check-prefix=i686_SSE2
; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse2,-avx | FileCheck %s --check-prefix=x86_64_SSE2
; RUN: llc < %s -mtriple=x86_64--gnux32 -mattr=+sse2,-avx | FileCheck %s --check-prefix=x86_x32_SSE2
; RUN: llc < %s -mtriple=i686-- -mattr=+avx | FileCheck %s --check-prefix=i686_AVX
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx | FileCheck %s --check-prefix=x86_64_AVX
; RUN: llc < %s -mtriple=x86_64--gnux32 -mattr=+avx | FileCheck %s --check-prefix=x86_x32_AVX
; rdar://6573467

define void @test(<16 x i8> %a, <16 x i8> %b, i32 %dummy, i8* %c) nounwind {
Expand All @@ -20,6 +22,13 @@ define void @test(<16 x i8> %a, <16 x i8> %b, i32 %dummy, i8* %c) nounwind {
; x86_64_SSE2-NEXT: maskmovdqu %xmm1, %xmm0
; x86_64_SSE2-NEXT: retq
;
; x86_x32_SSE2-LABEL: test:
; x86_x32_SSE2: # %bb.0: # %entry
; x86_x32_SSE2-NEXT: movq %rsi, %rdi
; x86_x32_SSE2-NEXT: # kill: def $edi killed $edi killed $rdi
; x86_x32_SSE2-NEXT: addr32 maskmovdqu %xmm1, %xmm0
; x86_x32_SSE2-NEXT: retq
;
; i686_AVX-LABEL: test:
; i686_AVX: # %bb.0: # %entry
; i686_AVX-NEXT: pushl %edi
Expand All @@ -33,6 +42,12 @@ define void @test(<16 x i8> %a, <16 x i8> %b, i32 %dummy, i8* %c) nounwind {
; x86_64_AVX-NEXT: movq %rsi, %rdi
; x86_64_AVX-NEXT: vmaskmovdqu %xmm1, %xmm0
; x86_64_AVX-NEXT: retq
; x86_x32_AVX-LABEL: test:
; x86_x32_AVX: # %bb.0: # %entry
; x86_x32_AVX-NEXT: movq %rsi, %rdi
; x86_x32_AVX-NEXT: # kill: def $edi killed $edi killed $rdi
; x86_x32_AVX-NEXT: addr32 vmaskmovdqu %xmm1, %xmm0
; x86_x32_AVX-NEXT: retq
entry:
tail call void @llvm.x86.sse2.maskmov.dqu( <16 x i8> %a, <16 x i8> %b, i8* %c )
ret void
Expand Down
Loading

0 comments on commit a8ad917

Please sign in to comment.