Skip to content

Commit

Permalink
[x86] Handle more cases where we can re-use an atomic operation's flags
Browse files Browse the repository at this point in the history
rather than doing a separate comparison.

This both saves an explicit comparision and avoids the use of `xadd`
which introduces register constraints and other challenges to the
generated code.

The motivating case is from atomic reference counts where `1` is the
sentinel rather than `0` for whatever reason. This can and should be
lowered efficiently on x86 by just using a different flag, however the
x86 code only handled the `0` case.

There remains some further opportunities here that are currently hidden
due to canonicalization. I've included test cases that show these and
FIXMEs. However, I don't at the moment have any production use cases and
they seem substantially harder to address.

Differential Revision: https://reviews.llvm.org/D36945

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@311317 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information
chandlerc committed Aug 21, 2017
1 parent 0472b1c commit 8f31059
Show file tree
Hide file tree
Showing 2 changed files with 116 additions and 6 deletions.
36 changes: 30 additions & 6 deletions lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30732,12 +30732,7 @@ static SDValue combineSetCCAtomicArith(SDValue Cmp, X86::CondCode &CC,
if (!CmpLHS.hasOneUse())
return SDValue();

auto *CmpRHSC = dyn_cast<ConstantSDNode>(CmpRHS);
if (!CmpRHSC || CmpRHSC->getZExtValue() != 0)
return SDValue();

const unsigned Opc = CmpLHS.getOpcode();

unsigned Opc = CmpLHS.getOpcode();
if (Opc != ISD::ATOMIC_LOAD_ADD && Opc != ISD::ATOMIC_LOAD_SUB)
return SDValue();

Expand All @@ -30750,6 +30745,35 @@ static SDValue combineSetCCAtomicArith(SDValue Cmp, X86::CondCode &CC,
if (Opc == ISD::ATOMIC_LOAD_SUB)
Addend = -Addend;

auto *CmpRHSC = dyn_cast<ConstantSDNode>(CmpRHS);
if (!CmpRHSC)
return SDValue();

APInt Comparison = CmpRHSC->getAPIntValue();

// If the addend is the negation of the comparison value, then we can do
// a full comparison by emitting the atomic arithmetic is a locked sub.
if (Comparison == -Addend) {
// The CC is fine, but we need to rewrite the LHS of the comparison as an
// atomic sub.
auto *AN = cast<AtomicSDNode>(CmpLHS.getNode());
auto AtomicSub = DAG.getAtomic(
ISD::ATOMIC_LOAD_SUB, SDLoc(CmpLHS), CmpLHS.getValueType(),
/*Chain*/ CmpLHS.getOperand(0), /*LHS*/ CmpLHS.getOperand(1),
/*RHS*/ DAG.getConstant(-Addend, SDLoc(CmpRHS), CmpRHS.getValueType()),
AN->getMemOperand());
auto LockOp = lowerAtomicArithWithLOCK(AtomicSub, DAG);
DAG.ReplaceAllUsesOfValueWith(CmpLHS.getValue(0),
DAG.getUNDEF(CmpLHS.getValueType()));
DAG.ReplaceAllUsesOfValueWith(CmpLHS.getValue(1), LockOp.getValue(1));
return LockOp;
}

// We can handle comparisons with zero in a number of cases by manipulating
// the CC used.
if (!Comparison.isNullValue())
return SDValue();

if (CC == X86::COND_S && Addend == 1)
CC = X86::COND_LE;
else if (CC == X86::COND_NS && Addend == 1)
Expand Down
86 changes: 86 additions & 0 deletions test/CodeGen/X86/atomic-eflags-reuse.ll
Original file line number Diff line number Diff line change
Expand Up @@ -192,4 +192,90 @@ entry:
ret i8 %s2
}

define i8 @test_sub_1_cmp_1_setcc_eq(i64* %p) #0 {
; CHECK-LABEL: test_sub_1_cmp_1_setcc_eq:
; CHECK: # BB#0: # %entry
; CHECK-NEXT: lock decq (%rdi)
; CHECK-NEXT: sete %al
; CHECK-NEXT: retq
entry:
%tmp0 = atomicrmw sub i64* %p, i64 1 seq_cst
%tmp1 = icmp eq i64 %tmp0, 1
%tmp2 = zext i1 %tmp1 to i8
ret i8 %tmp2
}

define i8 @test_sub_1_cmp_1_setcc_ne(i64* %p) #0 {
; CHECK-LABEL: test_sub_1_cmp_1_setcc_ne:
; CHECK: # BB#0: # %entry
; CHECK-NEXT: lock decq (%rdi)
; CHECK-NEXT: setne %al
; CHECK-NEXT: retq
entry:
%tmp0 = atomicrmw sub i64* %p, i64 1 seq_cst
%tmp1 = icmp ne i64 %tmp0, 1
%tmp2 = zext i1 %tmp1 to i8
ret i8 %tmp2
}

define i8 @test_sub_1_cmp_1_setcc_ugt(i64* %p) #0 {
; CHECK-LABEL: test_sub_1_cmp_1_setcc_ugt:
; CHECK: # BB#0: # %entry
; CHECK-NEXT: lock decq (%rdi)
; CHECK-NEXT: seta %al
; CHECK-NEXT: retq
entry:
%tmp0 = atomicrmw sub i64* %p, i64 1 seq_cst
%tmp1 = icmp ugt i64 %tmp0, 1
%tmp2 = zext i1 %tmp1 to i8
ret i8 %tmp2
}

; FIXME: This test canonicalizes in a way that hides the fact that the
; comparison can be folded into the atomic subtract.
define i8 @test_sub_1_cmp_1_setcc_sle(i64* %p) #0 {
; CHECK-LABEL: test_sub_1_cmp_1_setcc_sle:
; CHECK: # BB#0: # %entry
; CHECK-NEXT: movq $-1, %rax
; CHECK-NEXT: lock xaddq %rax, (%rdi)
; CHECK-NEXT: cmpq $2, %rax
; CHECK-NEXT: setl %al
; CHECK-NEXT: retq
entry:
%tmp0 = atomicrmw sub i64* %p, i64 1 seq_cst
%tmp1 = icmp sle i64 %tmp0, 1
%tmp2 = zext i1 %tmp1 to i8
ret i8 %tmp2
}

define i8 @test_sub_3_cmp_3_setcc_eq(i64* %p) #0 {
; CHECK-LABEL: test_sub_3_cmp_3_setcc_eq:
; CHECK: # BB#0: # %entry
; CHECK-NEXT: lock subq $3, (%rdi)
; CHECK-NEXT: sete %al
; CHECK-NEXT: retq
entry:
%tmp0 = atomicrmw sub i64* %p, i64 3 seq_cst
%tmp1 = icmp eq i64 %tmp0, 3
%tmp2 = zext i1 %tmp1 to i8
ret i8 %tmp2
}

; FIXME: This test canonicalizes in a way that hides the fact that the
; comparison can be folded into the atomic subtract.
define i8 @test_sub_3_cmp_3_setcc_uge(i64* %p) #0 {
; CHECK-LABEL: test_sub_3_cmp_3_setcc_uge:
; CHECK: # BB#0: # %entry
; CHECK-NEXT: movq $-3, %rax
; CHECK-NEXT: lock xaddq %rax, (%rdi)
; CHECK-NEXT: cmpq $2, %rax
; CHECK-NEXT: seta %al
; CHECK-NEXT: retq
entry:
%tmp0 = atomicrmw sub i64* %p, i64 3 seq_cst
%tmp1 = icmp uge i64 %tmp0, 3
%tmp2 = zext i1 %tmp1 to i8
ret i8 %tmp2
}

attributes #0 = { nounwind }

0 comments on commit 8f31059

Please sign in to comment.