Skip to content

Commit

Permalink
release/18.x: [SystemZ] Fix overflow flag for i128 USUBO (#86491)
Browse files Browse the repository at this point in the history
We use the VSCBIQ/VSBIQ/VSBCBIQ family of instructions to implement
USUBO/USUBO_CARRY for the i128 data type. However, these instructions
use an inverted sense of the borrow indication flag (a value of 1
indicates *no* borrow, while a value of 0 indicated borrow). This does
not match the semantics of the boolean "overflow" flag of the
USUBO/USUBO_CARRY ISD nodes.

Fix this by generating code to explicitly invert the flag. These cancel
out of the result of USUBO feeds into an USUBO_CARRY.

To avoid unnecessary zero-extend operations, also improve the DAGCombine
handling of ZERO_EXTEND to optimize (zext (xor (trunc))) sequences where
appropriate.

Fixes: #83268
  • Loading branch information
uweigand committed Mar 27, 2024
1 parent 767b61c commit cfaeee6
Show file tree
Hide file tree
Showing 3 changed files with 58 additions and 0 deletions.
34 changes: 34 additions & 0 deletions llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4252,6 +4252,7 @@ SDValue SystemZTargetLowering::lowerXALUO(SDValue Op,
if (N->getValueType(0) == MVT::i128) {
unsigned BaseOp = 0;
unsigned FlagOp = 0;
bool IsBorrow = false;
switch (Op.getOpcode()) {
default: llvm_unreachable("Unknown instruction!");
case ISD::UADDO:
Expand All @@ -4261,13 +4262,17 @@ SDValue SystemZTargetLowering::lowerXALUO(SDValue Op,
case ISD::USUBO:
BaseOp = ISD::SUB;
FlagOp = SystemZISD::VSCBI;
IsBorrow = true;
break;
}
SDValue Result = DAG.getNode(BaseOp, DL, MVT::i128, LHS, RHS);
SDValue Flag = DAG.getNode(FlagOp, DL, MVT::i128, LHS, RHS);
Flag = DAG.getNode(ISD::AssertZext, DL, MVT::i128, Flag,
DAG.getValueType(MVT::i1));
Flag = DAG.getZExtOrTrunc(Flag, DL, N->getValueType(1));
if (IsBorrow)
Flag = DAG.getNode(ISD::XOR, DL, Flag.getValueType(),
Flag, DAG.getConstant(1, DL, Flag.getValueType()));
return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, Flag);
}

Expand Down Expand Up @@ -4340,6 +4345,7 @@ SDValue SystemZTargetLowering::lowerUADDSUBO_CARRY(SDValue Op,
if (VT == MVT::i128) {
unsigned BaseOp = 0;
unsigned FlagOp = 0;
bool IsBorrow = false;
switch (Op.getOpcode()) {
default: llvm_unreachable("Unknown instruction!");
case ISD::UADDO_CARRY:
Expand All @@ -4349,14 +4355,21 @@ SDValue SystemZTargetLowering::lowerUADDSUBO_CARRY(SDValue Op,
case ISD::USUBO_CARRY:
BaseOp = SystemZISD::VSBI;
FlagOp = SystemZISD::VSBCBI;
IsBorrow = true;
break;
}
if (IsBorrow)
Carry = DAG.getNode(ISD::XOR, DL, Carry.getValueType(),
Carry, DAG.getConstant(1, DL, Carry.getValueType()));
Carry = DAG.getZExtOrTrunc(Carry, DL, MVT::i128);
SDValue Result = DAG.getNode(BaseOp, DL, MVT::i128, LHS, RHS, Carry);
SDValue Flag = DAG.getNode(FlagOp, DL, MVT::i128, LHS, RHS, Carry);
Flag = DAG.getNode(ISD::AssertZext, DL, MVT::i128, Flag,
DAG.getValueType(MVT::i1));
Flag = DAG.getZExtOrTrunc(Flag, DL, N->getValueType(1));
if (IsBorrow)
Flag = DAG.getNode(ISD::XOR, DL, Flag.getValueType(),
Flag, DAG.getConstant(1, DL, Flag.getValueType()));
return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, Flag);
}

Expand Down Expand Up @@ -6611,6 +6624,27 @@ SDValue SystemZTargetLowering::combineZERO_EXTEND(
return NewSelect;
}
}
// Convert (zext (xor (trunc X), C)) into (xor (trunc X), C') if the size
// of the result is smaller than the size of X and all the truncated bits
// of X are already zero.
if (N0.getOpcode() == ISD::XOR &&
N0.hasOneUse() && N0.getOperand(0).hasOneUse() &&
N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
N0.getOperand(1).getOpcode() == ISD::Constant) {
SDValue X = N0.getOperand(0).getOperand(0);
if (VT.isScalarInteger() && VT.getSizeInBits() < X.getValueSizeInBits()) {
KnownBits Known = DAG.computeKnownBits(X);
APInt TruncatedBits = APInt::getBitsSet(X.getValueSizeInBits(),
N0.getValueSizeInBits(),
VT.getSizeInBits());
if (TruncatedBits.isSubsetOf(Known.Zero)) {
X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
return DAG.getNode(ISD::XOR, SDLoc(N0), VT,
X, DAG.getConstant(Mask, SDLoc(N0), VT));
}
}
}
return SDValue();
}

Expand Down
22 changes: 22 additions & 0 deletions llvm/test/CodeGen/SystemZ/int-usub-12.ll
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ define zeroext i1 @f1(i128 %a, i128 %b, ptr %res) {
; CHECK-NEXT: vscbiq %v2, %v1, %v0
; CHECK-NEXT: vlgvg %r2, %v2, 1
; CHECK-NEXT: vsq %v0, %v1, %v0
; CHECK-NEXT: xilf %r2, 1
; CHECK-NEXT: vst %v0, 0(%r4), 3
; CHECK-NEXT: br %r14
%t = call {i128, i1} @llvm.usub.with.overflow.i128(i128 %a, i128 %b)
Expand All @@ -27,6 +28,7 @@ define zeroext i1 @f2(i128 %a, i128 %b) {
; CHECK-NEXT: vl %v1, 0(%r2), 3
; CHECK-NEXT: vscbiq %v0, %v1, %v0
; CHECK-NEXT: vlgvg %r2, %v0, 1
; CHECK-NEXT: xilf %r2, 1
; CHECK-NEXT: br %r14
%t = call {i128, i1} @llvm.usub.with.overflow.i128(i128 %a, i128 %b)
%obit = extractvalue {i128, i1} %t, 1
Expand All @@ -46,5 +48,25 @@ define i128 @f3(i128 %a, i128 %b) {
ret i128 %val
}

define i128 @f4(i128 %a, i128 %b) {
; CHECK-LABEL: f4:
; CHECK: # %bb.0:
; CHECK-NEXT: vl %v0, 0(%r4), 3
; CHECK-NEXT: vl %v1, 0(%r3), 3
; CHECK-NEXT: vscbiq %v2, %v1, %v0
; CHECK-NEXT: vlgvf %r0, %v2, 3
; CHECK-NEXT: vgbm %v2, 0
; CHECK-NEXT: xilf %r0, 1
; CHECK-NEXT: jl .LBB3_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: vsq %v2, %v1, %v0
; CHECK-NEXT: .LBB3_2:
; CHECK-NEXT: vst %v2, 0(%r2), 3
; CHECK-NEXT: br %r14
%val = call i128 @llvm.usub.sat.i128(i128 %a, i128 %b)
ret i128 %val
}

declare {i128, i1} @llvm.usub.with.overflow.i128(i128, i128) nounwind readnone
declare i128 @llvm.usub.sat.i128(i128, i128) nounwind readnone

2 changes: 2 additions & 0 deletions llvm/test/CodeGen/SystemZ/int-usub-13.ll
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ define zeroext i1 @f1(i256 %a, i256 %b, ptr %res) {
; CHECK-NEXT: vlgvg %r2, %v5, 1
; CHECK-NEXT: vsbiq %v0, %v1, %v0, %v4
; CHECK-NEXT: vsq %v1, %v3, %v2
; CHECK-NEXT: xilf %r2, 1
; CHECK-NEXT: vst %v1, 16(%r4), 3
; CHECK-NEXT: vst %v0, 0(%r4), 3
; CHECK-NEXT: br %r14
Expand All @@ -35,6 +36,7 @@ define zeroext i1 @f2(i256 %a, i256 %b) {
; CHECK-NEXT: vscbiq %v2, %v3, %v2
; CHECK-NEXT: vsbcbiq %v0, %v1, %v0, %v2
; CHECK-NEXT: vlgvg %r2, %v0, 1
; CHECK-NEXT: xilf %r2, 1
; CHECK-NEXT: br %r14
%t = call {i256, i1} @llvm.usub.with.overflow.i256(i256 %a, i256 %b)
%obit = extractvalue {i256, i1} %t, 1
Expand Down

0 comments on commit cfaeee6

Please sign in to comment.