-
Notifications
You must be signed in to change notification settings - Fork 14.4k
[ScalarEvolutionExpander] Use IRBuilder::CreateBinOp in SCEVExpander::InsertBinop. #146443
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
…:InsertBinop. Removes the shift pair from add1_3 in llvm#146241.
@llvm/pr-subscribers-backend-x86 @llvm/pr-subscribers-backend-aarch64 Author: Craig Topper (topperc) ChangesRemoves the shift pair from add1_3 in #146241. I think there may be some regressions in here. Patch is 24.13 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/146443.diff 12 Files Affected:
diff --git a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
index 24fe08d6c3e4e..a8caa08d4fdb0 100644
--- a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
+++ b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
@@ -315,14 +315,14 @@ Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode,
}
// If we haven't found this binop, insert it.
- // TODO: Use the Builder, which will make CreateBinOp below fold with
- // InstSimplifyFolder.
- Instruction *BO = Builder.Insert(BinaryOperator::Create(Opcode, LHS, RHS));
- BO->setDebugLoc(Loc);
- if (Flags & SCEV::FlagNUW)
- BO->setHasNoUnsignedWrap();
- if (Flags & SCEV::FlagNSW)
- BO->setHasNoSignedWrap();
+ Value *BO = Builder.CreateBinOp(Opcode, LHS, RHS);
+ if (auto *BOI = dyn_cast<Instruction>(BO)) {
+ BOI->setDebugLoc(Loc);
+ if (Flags & SCEV::FlagNUW)
+ BOI->setHasNoUnsignedWrap();
+ if (Flags & SCEV::FlagNSW)
+ BOI->setHasNoSignedWrap();
+ }
return BO;
}
diff --git a/llvm/test/CodeGen/AArch64/sink-and-fold.ll b/llvm/test/CodeGen/AArch64/sink-and-fold.ll
index 163124c0d2757..43db4aa032d1a 100644
--- a/llvm/test/CodeGen/AArch64/sink-and-fold.ll
+++ b/llvm/test/CodeGen/AArch64/sink-and-fold.ll
@@ -151,7 +151,7 @@ define void @f4(ptr %a, i64 %n) nounwind "target-features"="+alu-lsl-fast" {
; CHECK-NEXT: .LBB4_3: // %LI
; CHECK-NEXT: // =>This Loop Header: Depth=1
; CHECK-NEXT: // Child Loop BB4_6 Depth 2
-; CHECK-NEXT: mov x8, xzr
+; CHECK-NEXT: mov x21, xzr
; CHECK-NEXT: add x23, x22, #1
; CHECK-NEXT: b .LBB4_6
; CHECK-NEXT: .LBB4_4: // %if.else
@@ -159,16 +159,14 @@ define void @f4(ptr %a, i64 %n) nounwind "target-features"="+alu-lsl-fast" {
; CHECK-NEXT: ldr w0, [x20, x22, lsl #2]
; CHECK-NEXT: .LBB4_5: // %LJ.latch
; CHECK-NEXT: // in Loop: Header=BB4_6 Depth=2
-; CHECK-NEXT: add x8, x21, #1
+; CHECK-NEXT: cmp x21, x19
; CHECK-NEXT: str w0, [x20, x21, lsl #2]
-; CHECK-NEXT: sub x9, x8, #1
-; CHECK-NEXT: cmp x9, x19
+; CHECK-NEXT: add x21, x21, #1
; CHECK-NEXT: b.ge .LBB4_2
; CHECK-NEXT: .LBB4_6: // %LJ
; CHECK-NEXT: // Parent Loop BB4_3 Depth=1
; CHECK-NEXT: // => This Inner Loop Header: Depth=2
-; CHECK-NEXT: mov x21, x8
-; CHECK-NEXT: ldr w8, [x20, x8, lsl #2]
+; CHECK-NEXT: ldr w8, [x20, x21, lsl #2]
; CHECK-NEXT: tbz w8, #31, .LBB4_4
; CHECK-NEXT: // %bb.7: // %if.then
; CHECK-NEXT: // in Loop: Header=BB4_6 Depth=2
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/no-dec-le-simple.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/no-dec-le-simple.ll
index 8e8934b6e9599..da72ddd81350d 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/no-dec-le-simple.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/no-dec-le-simple.ll
@@ -96,17 +96,18 @@ exit:
define void @cbz_exit_minsize(ptr %in, ptr %res) #0 {
; CHECK-LABEL: cbz_exit_minsize:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: movs r2, #0
+; CHECK-NEXT: push {r4, lr}
+; CHECK-NEXT: movs r3, #0
; CHECK-NEXT: .LBB3_1: @ %loop
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: ldr.w r3, [r0, r2, lsl #2]
-; CHECK-NEXT: adds r2, #1
-; CHECK-NEXT: cmp r3, #0
+; CHECK-NEXT: ldr.w r4, [r0, r3, lsl #2]
+; CHECK-NEXT: mov r2, r3
+; CHECK-NEXT: adds r3, #1
+; CHECK-NEXT: cmp r4, #0
; CHECK-NEXT: bne .LBB3_1
; CHECK-NEXT: @ %bb.2: @ %exit
-; CHECK-NEXT: subs r0, r2, #1
-; CHECK-NEXT: str r0, [r1]
-; CHECK-NEXT: bx lr
+; CHECK-NEXT: str r2, [r1]
+; CHECK-NEXT: pop {r4, pc}
entry:
br label %loop
@@ -126,17 +127,18 @@ exit:
define void @cbnz_exit_minsize(ptr %in, ptr %res) #0 {
; CHECK-LABEL: cbnz_exit_minsize:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: movs r2, #0
+; CHECK-NEXT: push {r4, lr}
+; CHECK-NEXT: movs r3, #0
; CHECK-NEXT: .LBB4_1: @ %loop
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: ldr.w r3, [r0, r2, lsl #2]
-; CHECK-NEXT: adds r2, #1
-; CHECK-NEXT: cmp r3, #0
+; CHECK-NEXT: ldr.w r4, [r0, r3, lsl #2]
+; CHECK-NEXT: mov r2, r3
+; CHECK-NEXT: adds r3, #1
+; CHECK-NEXT: cmp r4, #0
; CHECK-NEXT: beq .LBB4_1
; CHECK-NEXT: @ %bb.2: @ %exit
-; CHECK-NEXT: subs r0, r2, #1
-; CHECK-NEXT: str r0, [r1]
-; CHECK-NEXT: bx lr
+; CHECK-NEXT: str r2, [r1]
+; CHECK-NEXT: pop {r4, pc}
entry:
br label %loop
diff --git a/llvm/test/CodeGen/X86/break-false-dep.ll b/llvm/test/CodeGen/X86/break-false-dep.ll
index 6943622fac7f2..6df19767b2bb4 100644
--- a/llvm/test/CodeGen/X86/break-false-dep.ll
+++ b/llvm/test/CodeGen/X86/break-false-dep.ll
@@ -1308,28 +1308,27 @@ define dso_local void @loopclearance2(ptr nocapture %y, ptr %x, double %c1, doub
; SSE-LINUX-NEXT: .LBB13_1: # %inner_loop
; SSE-LINUX-NEXT: # =>This Inner Loop Header: Depth=1
; SSE-LINUX-NEXT: movq %rcx, %r8
-; SSE-LINUX-NEXT: shrq $6, %r8
-; SSE-LINUX-NEXT: movq (%rsi,%r8,8), %r8
-; SSE-LINUX-NEXT: btq %rcx, %r8
-; SSE-LINUX-NEXT: leaq 1(%rcx), %rcx
+; SSE-LINUX-NEXT: shrq $6, %rcx
+; SSE-LINUX-NEXT: movq (%rsi,%rcx,8), %r9
+; SSE-LINUX-NEXT: leaq 1(%r8), %rcx
+; SSE-LINUX-NEXT: btq %r8, %r9
; SSE-LINUX-NEXT: jae .LBB13_1
; SSE-LINUX-NEXT: # %bb.2: # %loop_end
; SSE-LINUX-NEXT: # in Loop: Header=BB13_1 Depth=1
-; SSE-LINUX-NEXT: leaq 1(%rax), %r8
+; SSE-LINUX-NEXT: leaq 1(%rax), %r9
; SSE-LINUX-NEXT: xorps %xmm4, %xmm4
-; SSE-LINUX-NEXT: cvtsi2sd %r8, %xmm4
+; SSE-LINUX-NEXT: cvtsi2sd %r9, %xmm4
; SSE-LINUX-NEXT: movapd %xmm0, %xmm5
; SSE-LINUX-NEXT: subsd %xmm4, %xmm5
; SSE-LINUX-NEXT: mulsd %xmm1, %xmm5
-; SSE-LINUX-NEXT: leaq -1(%rcx), %r9
; SSE-LINUX-NEXT: xorps %xmm4, %xmm4
-; SSE-LINUX-NEXT: cvtsi2sd %r9, %xmm4
+; SSE-LINUX-NEXT: cvtsi2sd %r8, %xmm4
; SSE-LINUX-NEXT: mulsd %xmm2, %xmm4
; SSE-LINUX-NEXT: addsd %xmm5, %xmm4
; SSE-LINUX-NEXT: divsd %xmm3, %xmm4
; SSE-LINUX-NEXT: movsd %xmm4, -8(%rdi,%rax,8)
-; SSE-LINUX-NEXT: movq %r8, %rax
-; SSE-LINUX-NEXT: cmpq %r8, %rdx
+; SSE-LINUX-NEXT: movq %r9, %rax
+; SSE-LINUX-NEXT: cmpq %r9, %rdx
; SSE-LINUX-NEXT: jge .LBB13_1
; SSE-LINUX-NEXT: # %bb.3: # %loopdone
; SSE-LINUX-NEXT: retq
@@ -1380,28 +1379,27 @@ define dso_local void @loopclearance2(ptr nocapture %y, ptr %x, double %c1, doub
; SSE-WIN-NEXT: .LBB13_1: # %inner_loop
; SSE-WIN-NEXT: # =>This Inner Loop Header: Depth=1
; SSE-WIN-NEXT: movq %r9, %r10
-; SSE-WIN-NEXT: shrq $6, %r10
-; SSE-WIN-NEXT: movq (%rdx,%r10,8), %r10
-; SSE-WIN-NEXT: btq %r9, %r10
-; SSE-WIN-NEXT: leaq 1(%r9), %r9
+; SSE-WIN-NEXT: shrq $6, %r9
+; SSE-WIN-NEXT: movq (%rdx,%r9,8), %r11
+; SSE-WIN-NEXT: leaq 1(%r10), %r9
+; SSE-WIN-NEXT: btq %r10, %r11
; SSE-WIN-NEXT: jae .LBB13_1
; SSE-WIN-NEXT: # %bb.2: # %loop_end
; SSE-WIN-NEXT: # in Loop: Header=BB13_1 Depth=1
-; SSE-WIN-NEXT: leaq 1(%r8), %r10
+; SSE-WIN-NEXT: leaq 1(%r8), %r11
; SSE-WIN-NEXT: xorps %xmm4, %xmm4
-; SSE-WIN-NEXT: cvtsi2sd %r10, %xmm4
+; SSE-WIN-NEXT: cvtsi2sd %r11, %xmm4
; SSE-WIN-NEXT: movapd %xmm2, %xmm5
; SSE-WIN-NEXT: subsd %xmm4, %xmm5
; SSE-WIN-NEXT: mulsd %xmm3, %xmm5
-; SSE-WIN-NEXT: leaq -1(%r9), %r11
; SSE-WIN-NEXT: xorps %xmm4, %xmm4
-; SSE-WIN-NEXT: cvtsi2sd %r11, %xmm4
+; SSE-WIN-NEXT: cvtsi2sd %r10, %xmm4
; SSE-WIN-NEXT: mulsd %xmm1, %xmm4
; SSE-WIN-NEXT: addsd %xmm5, %xmm4
; SSE-WIN-NEXT: divsd %xmm0, %xmm4
; SSE-WIN-NEXT: movsd %xmm4, -8(%rcx,%r8,8)
-; SSE-WIN-NEXT: movq %r10, %r8
-; SSE-WIN-NEXT: cmpq %r10, %rax
+; SSE-WIN-NEXT: movq %r11, %r8
+; SSE-WIN-NEXT: cmpq %r11, %rax
; SSE-WIN-NEXT: jge .LBB13_1
; SSE-WIN-NEXT: # %bb.3: # %loopdone
; SSE-WIN-NEXT: movaps (%rsp), %xmm7 # 16-byte Reload
@@ -1465,25 +1463,24 @@ define dso_local void @loopclearance2(ptr nocapture %y, ptr %x, double %c1, doub
; AVX1-NEXT: .LBB13_1: # %inner_loop
; AVX1-NEXT: # =>This Inner Loop Header: Depth=1
; AVX1-NEXT: movq %r9, %r10
-; AVX1-NEXT: shrq $6, %r10
-; AVX1-NEXT: movq (%rdx,%r10,8), %r10
-; AVX1-NEXT: btq %r9, %r10
-; AVX1-NEXT: leaq 1(%r9), %r9
+; AVX1-NEXT: shrq $6, %r9
+; AVX1-NEXT: movq (%rdx,%r9,8), %r11
+; AVX1-NEXT: leaq 1(%r10), %r9
+; AVX1-NEXT: btq %r10, %r11
; AVX1-NEXT: jae .LBB13_1
; AVX1-NEXT: # %bb.2: # %loop_end
; AVX1-NEXT: # in Loop: Header=BB13_1 Depth=1
-; AVX1-NEXT: leaq 1(%r8), %r10
-; AVX1-NEXT: vcvtsi2sd %r10, %xmm6, %xmm4
+; AVX1-NEXT: leaq 1(%r8), %r11
+; AVX1-NEXT: vcvtsi2sd %r11, %xmm6, %xmm4
; AVX1-NEXT: vsubsd %xmm4, %xmm2, %xmm4
; AVX1-NEXT: vmulsd %xmm3, %xmm4, %xmm4
-; AVX1-NEXT: leaq -1(%r9), %r11
-; AVX1-NEXT: vcvtsi2sd %r11, %xmm6, %xmm5
+; AVX1-NEXT: vcvtsi2sd %r10, %xmm6, %xmm5
; AVX1-NEXT: vmulsd %xmm1, %xmm5, %xmm5
; AVX1-NEXT: vaddsd %xmm5, %xmm4, %xmm4
; AVX1-NEXT: vdivsd %xmm0, %xmm4, %xmm4
; AVX1-NEXT: vmovsd %xmm4, -8(%rcx,%r8,8)
-; AVX1-NEXT: movq %r10, %r8
-; AVX1-NEXT: cmpq %r10, %rax
+; AVX1-NEXT: movq %r11, %r8
+; AVX1-NEXT: cmpq %r11, %rax
; AVX1-NEXT: jge .LBB13_1
; AVX1-NEXT: # %bb.3: # %loopdone
; AVX1-NEXT: vmovaps (%rsp), %xmm7 # 16-byte Reload
@@ -1547,25 +1544,24 @@ define dso_local void @loopclearance2(ptr nocapture %y, ptr %x, double %c1, doub
; AVX512VL-NEXT: .LBB13_1: # %inner_loop
; AVX512VL-NEXT: # =>This Inner Loop Header: Depth=1
; AVX512VL-NEXT: movq %r9, %r10
-; AVX512VL-NEXT: shrq $6, %r10
-; AVX512VL-NEXT: movq (%rdx,%r10,8), %r10
-; AVX512VL-NEXT: btq %r9, %r10
-; AVX512VL-NEXT: leaq 1(%r9), %r9
+; AVX512VL-NEXT: shrq $6, %r9
+; AVX512VL-NEXT: movq (%rdx,%r9,8), %r11
+; AVX512VL-NEXT: leaq 1(%r10), %r9
+; AVX512VL-NEXT: btq %r10, %r11
; AVX512VL-NEXT: jae .LBB13_1
; AVX512VL-NEXT: # %bb.2: # %loop_end
; AVX512VL-NEXT: # in Loop: Header=BB13_1 Depth=1
-; AVX512VL-NEXT: leaq 1(%r8), %r10
-; AVX512VL-NEXT: vcvtsi2sd %r10, %xmm6, %xmm4
+; AVX512VL-NEXT: leaq 1(%r8), %r11
+; AVX512VL-NEXT: vcvtsi2sd %r11, %xmm6, %xmm4
; AVX512VL-NEXT: vsubsd %xmm4, %xmm2, %xmm4
; AVX512VL-NEXT: vmulsd %xmm3, %xmm4, %xmm4
-; AVX512VL-NEXT: leaq -1(%r9), %r11
-; AVX512VL-NEXT: vcvtsi2sd %r11, %xmm6, %xmm5
+; AVX512VL-NEXT: vcvtsi2sd %r10, %xmm6, %xmm5
; AVX512VL-NEXT: vmulsd %xmm1, %xmm5, %xmm5
; AVX512VL-NEXT: vaddsd %xmm5, %xmm4, %xmm4
; AVX512VL-NEXT: vdivsd %xmm0, %xmm4, %xmm4
; AVX512VL-NEXT: vmovsd %xmm4, -8(%rcx,%r8,8)
-; AVX512VL-NEXT: movq %r10, %r8
-; AVX512VL-NEXT: cmpq %r10, %rax
+; AVX512VL-NEXT: movq %r11, %r8
+; AVX512VL-NEXT: cmpq %r11, %rax
; AVX512VL-NEXT: jge .LBB13_1
; AVX512VL-NEXT: # %bb.3: # %loopdone
; AVX512VL-NEXT: vmovaps (%rsp), %xmm7 # 16-byte Reload
diff --git a/llvm/test/Transforms/CodeGenPrepare/AArch64/large-offset-gep.ll b/llvm/test/Transforms/CodeGenPrepare/AArch64/large-offset-gep.ll
index 097575ca86bcc..94d6b180c061a 100644
--- a/llvm/test/Transforms/CodeGenPrepare/AArch64/large-offset-gep.ll
+++ b/llvm/test/Transforms/CodeGenPrepare/AArch64/large-offset-gep.ll
@@ -13,9 +13,10 @@ define void @test1(ptr %s, i32 %n) {
; CHECK-NEXT: b.ge .LBB0_2
; CHECK-NEXT: .LBB0_1: // %while_body
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: str w9, [x8, #3140]
+; CHECK-NEXT: mov w10, w9
; CHECK-NEXT: add w9, w9, #1
; CHECK-NEXT: str w9, [x8, #3136]
+; CHECK-NEXT: str w10, [x8, #3140]
; CHECK-NEXT: cmp w9, w1
; CHECK-NEXT: b.lt .LBB0_1
; CHECK-NEXT: .LBB0_2: // %while_end
@@ -52,9 +53,10 @@ define void @test2(ptr %struct, i32 %n) {
; CHECK-NEXT: b.ge .LBB1_3
; CHECK-NEXT: .LBB1_2: // %while_body
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: str w9, [x8, #3140]
+; CHECK-NEXT: mov w10, w9
; CHECK-NEXT: add w9, w9, #1
; CHECK-NEXT: str w9, [x8, #3136]
+; CHECK-NEXT: str w10, [x8, #3140]
; CHECK-NEXT: cmp w9, w1
; CHECK-NEXT: b.lt .LBB1_2
; CHECK-NEXT: .LBB1_3: // %while_end
@@ -93,9 +95,10 @@ define void @test3(ptr %s1, ptr %s2, i1 %cond, i32 %n) {
; CHECK-NEXT: b.ge .LBB2_3
; CHECK-NEXT: .LBB2_2: // %while_body
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: str w9, [x8, #3140]
+; CHECK-NEXT: mov w10, w9
; CHECK-NEXT: add w9, w9, #1
; CHECK-NEXT: str w9, [x8, #3136]
+; CHECK-NEXT: str w10, [x8, #3140]
; CHECK-NEXT: cmp w9, w3
; CHECK-NEXT: b.lt .LBB2_2
; CHECK-NEXT: .LBB2_3: // %while_end
@@ -146,10 +149,11 @@ define void @test4(i32 %n) uwtable personality ptr @__FrameHandler {
; CHECK-NEXT: .cfi_offset w30, -32
; CHECK-NEXT: .cfi_remember_state
; CHECK-NEXT: mov w19, w0
-; CHECK-NEXT: mov w20, wzr
+; CHECK-NEXT: mov w9, wzr
; CHECK-NEXT: .LBB3_1: // %while_cond
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: .Ltmp0:
+; CHECK-NEXT: mov w20, w9
; CHECK-NEXT: bl foo
; CHECK-NEXT: .Ltmp1:
; CHECK-NEXT: // %bb.2: // %while_cond_x.split
@@ -160,9 +164,9 @@ define void @test4(i32 %n) uwtable personality ptr @__FrameHandler {
; CHECK-NEXT: b.ge .LBB3_4
; CHECK-NEXT: // %bb.3: // %while_body
; CHECK-NEXT: // in Loop: Header=BB3_1 Depth=1
+; CHECK-NEXT: add w9, w20, #1
; CHECK-NEXT: str w20, [x8, #3140]
-; CHECK-NEXT: add w20, w20, #1
-; CHECK-NEXT: str w20, [x8, #3136]
+; CHECK-NEXT: str w9, [x8, #3136]
; CHECK-NEXT: b .LBB3_1
; CHECK-NEXT: .LBB3_4: // %while_end
; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
@@ -221,9 +225,10 @@ define void @test5(ptr %s, i32 %n) {
; CHECK-NEXT: b.ge .LBB4_2
; CHECK-NEXT: .LBB4_1: // %while_body
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: str w9, [x8, #2180]
+; CHECK-NEXT: mov w10, w9
; CHECK-NEXT: add w9, w9, #1
; CHECK-NEXT: str w9, [x8, #2176]
+; CHECK-NEXT: str w10, [x8, #2180]
; CHECK-NEXT: cmp w9, w1
; CHECK-NEXT: b.lt .LBB4_1
; CHECK-NEXT: .LBB4_2: // %while_end
diff --git a/llvm/test/Transforms/LoopStrengthReduce/X86/2011-11-29-postincphi.ll b/llvm/test/Transforms/LoopStrengthReduce/X86/2011-11-29-postincphi.ll
index 7ae78ae6a1fd4..6a70fe48de7e2 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/X86/2011-11-29-postincphi.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/X86/2011-11-29-postincphi.ll
@@ -20,17 +20,16 @@ define i64 @sqlite3DropTriggerPtr() nounwind {
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: .LBB0_1: # %bb1
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: movq %rbx, %rcx
; CHECK-NEXT: testb %al, %al
-; CHECK-NEXT: je .LBB0_4
+; CHECK-NEXT: je .LBB0_3
; CHECK-NEXT: # %bb.2: # %bb4
; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1
-; CHECK-NEXT: incq %rbx
+; CHECK-NEXT: leaq 1(%rcx), %rbx
; CHECK-NEXT: testb %al, %al
; CHECK-NEXT: jne .LBB0_1
-; CHECK-NEXT: # %bb.3: # %bb8split
-; CHECK-NEXT: decq %rbx
-; CHECK-NEXT: .LBB0_4: # %bb8
-; CHECK-NEXT: movq %rbx, %rax
+; CHECK-NEXT: .LBB0_3: # %bb8
+; CHECK-NEXT: movq %rcx, %rax
; CHECK-NEXT: popq %rbx
; CHECK-NEXT: retq
bb:
diff --git a/llvm/test/Transforms/LoopStrengthReduce/X86/pr46943.ll b/llvm/test/Transforms/LoopStrengthReduce/X86/pr46943.ll
index 89523b4487d49..b59ca63abfa41 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/X86/pr46943.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/X86/pr46943.ll
@@ -19,8 +19,7 @@ define i8 @drop_nuw() {
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[IV_NEXT]], 0
; CHECK-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[LOOP]]
; CHECK: exit:
-; CHECK-NEXT: [[TMP0:%.*]] = add i8 [[IV_NEXT]], -1
-; CHECK-NEXT: ret i8 [[TMP0]]
+; CHECK-NEXT: ret i8 [[IV]]
;
entry:
br label %loop
@@ -49,8 +48,7 @@ define i8 @drop_nsw() {
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[IV_NEXT]], 127
; CHECK-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[LOOP]]
; CHECK: exit:
-; CHECK-NEXT: [[TMP0:%.*]] = add i8 [[IV_NEXT]], 1
-; CHECK-NEXT: ret i8 [[TMP0]]
+; CHECK-NEXT: ret i8 [[IV]]
;
entry:
br label %loop
@@ -79,8 +77,7 @@ define i8 @already_postinc() {
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[IV_NEXT]], -1
; CHECK-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[LOOP]]
; CHECK: exit:
-; CHECK-NEXT: [[TMP0:%.*]] = add i8 [[IV_NEXT]], -1
-; CHECK-NEXT: ret i8 [[TMP0]]
+; CHECK-NEXT: ret i8 [[IV]]
;
entry:
br label %loop
diff --git a/llvm/test/Transforms/LoopStrengthReduce/depth-limit-overrun.ll b/llvm/test/Transforms/LoopStrengthReduce/depth-limit-overrun.ll
index 9c3698a740992..a46d75ae537ab 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/depth-limit-overrun.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/depth-limit-overrun.ll
@@ -22,10 +22,9 @@ define void @test(i32 %A, i32 %B, i32 %C) {
; DEFAULT: preheader:
; DEFAULT-NEXT: [[I15:%.*]] = shl i32 [[B]], 1
; DEFAULT-NEXT: [[TMP1:%.*]] = mul i32 [[PHI2]], -1
-; DEFAULT-NEXT: [[TMP2:%.*]] = mul i32 [[TMP1]], -1
-; DEFAULT-NEXT: [[TMP3:%.*]] = sub i32 [[PHI4]], [[TMP2]]
+; DEFAULT-NEXT: [[TMP3:%.*]] = sub i32 [[PHI4]], [[PHI2]]
; DEFAULT-NEXT: [[TMP4:%.*]] = add i32 [[B]], [[PHI4]]
-; DEFAULT-NEXT: [[TMP5:%.*]] = sub i32 [[TMP4]], [[TMP2]]
+; DEFAULT-NEXT: [[TMP5:%.*]] = sub i32 [[TMP4]], [[PHI2]]
; DEFAULT-NEXT: [[TMP6:%.*]] = sub i32 14, [[TMP5]]
; DEFAULT-NEXT: [[TMP7:%.*]] = add i32 [[TMP0]], [[PHI2]]
; DEFAULT-NEXT: br label [[INNER_LOOP:%.*]]
@@ -87,10 +86,9 @@ define void @test(i32 %A, i32 %B, i32 %C) {
; LIMIT: preheader:
; LIMIT-NEXT: [[I15:%.*]] = shl i32 [[B]], 1
; LIMIT-NEXT: [[TMP1:%.*]] = mul i32 [[PHI2]], -1
-; LIMIT-NEXT: [[TMP2:%.*]] = mul i32 [[TMP1]], -1
-; LIMIT-NEXT: [[TMP3:%.*]] = sub i32 [[PHI4]], [[TMP2]]
+; LIMIT-NEXT: [[TMP3:%.*]] = sub i32 [[PHI4]], [[PHI2]]
; LIMIT-NEXT: [[TMP4:%.*]] = add i32 [[B]], [[PHI4]]
-; LIMIT-NEXT: [[TMP5:%.*]] = sub i32 [[TMP4]], [[TMP2]]
+; LIMIT-NEXT: [[TMP5:%.*]] = sub i32 [[TMP4]], [[PHI2]]
; LIMIT-NEXT: [[TMP6:%.*]] = sub i32 14, [[TMP5]]
; LIMIT-NEXT: [[TMP7:%.*]] = add i32 [[TMP0]], [[PHI2]]
; LIMIT-NEXT: br label [[INNER_LOOP:%.*]]
diff --git a/llvm/test/Transforms/LoopStrengthReduce/lsr-rewrite-to-add-one.ll b/llvm/test/Transforms/LoopStrengthReduce/lsr-rewrite-to-add-one.ll
index 74d861cf5168c..d66905ae26f14 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/lsr-rewrite-to-add-one.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/lsr-rewrite-to-add-one.ll
@@ -20,8 +20,7 @@ define i32 @test(i1 %c.1, ptr %src) {
; CHECK-NEXT: [[OR:%.*]] = or i1 [[P]], [[T]]
; CHECK-NEXT: [[ZEXT_OR:%.*]] = zext i1 [[OR]] to i32
; CHECK-NEXT: [[LSR_IV_NEXT]] = add nuw i32 [[LSR_IV]], 1
-; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[LSR_IV_NEXT]], -1
-; CHECK-NEXT: [[LOOP_HEADER_TERMCOND:%.*]] = icmp sgt i32 [[TMP0]], -1050
+; CHECK-NEXT: [[LOOP_HEADER_TERMCOND:%.*]] = icmp sgt i32 [[LSR_IV]], -1050
; CHECK-NEXT: br i1 [[LOOP_HEADER_TERMCOND]], label [[LOOP_HEADER]], label [[EXIT:%.*]]
; CHECK: exit:
; CHECK-NEXT: [[ZEXT_OR_LCSSA:%.*]] = phi i32 [ [[ZEXT_OR]], [[LOOP_LATCH]] ]
diff --git a/llvm/test/Transforms/LoopStrengthReduce/wrong-hoisting-iv.ll b/llvm/test/Transforms/LoopStrengthReduce/wrong-hoisting-iv.ll
index 502042eaf9b9c..007d84f9120e3 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/wrong-hoisting-iv.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/wrong-hoisting-iv.ll
@@ -16,9 +16,7 @@ define void @test1() {
; CHECK-NEXT: [[VAL4:%.*]] = sub i32 [[VAL]], [[VAL3]]
; CHECK-NEXT: [[VAL5:%.*]] = ashr i32 undef, undef
; CHECK-NEXT: [[VAL6:%.*]] = sub i32 [[VAL4]], [[VAL5]]
-; CHECK-NEXT: [[TMP0:%.*]] = mul i32 [[VAL]], 7
-; CHECK-NEXT: [[TMP1:%.*]] = mul i32 [[VAL3]], 7
-; CHECK-NEXT: [[TMP2:%.*]] = sub i32 [[TMP0]], [[TMP1]]
+; CHECK-NEXT: [[TMP2:%.*]] = mul i32 [[VAL]], 7
; CHECK-NEXT: [[TMP3:%.*]] = mul i32 [[VAL5]], 7
; CHECK-NEXT: [[TMP4:%.*]] = sub i32 [[TMP2]], [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = shl i32 [[VAL6]], 3
@@ -58,10 +56,8 @@ define void @test1() {
; CHECK: bb20.bb15splitsplitsplit_crit_edge:
; CHECK-NEXT: [[TMP14:%.*]] = mul i32 [[VAL]], 3
; CHECK-NEXT: [[TMP15:%.*]] = mul i32 [[VAL1]], [[VAL2]]
-; CHECK-NEXT: [[TMP16:%.*]] = mul i32 [[TMP15]], 3
-; CHECK-NEXT: [[TMP17:%.*]] = sub i32 [[TMP14]...
[truncated]
|
@llvm/pr-subscribers-llvm-transforms Author: Craig Topper (topperc) ChangesRemoves the shift pair from add1_3 in #146241. I think there may be some regressions in here. Patch is 24.13 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/146443.diff 12 Files Affected:
diff --git a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
index 24fe08d6c3e4e..a8caa08d4fdb0 100644
--- a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
+++ b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
@@ -315,14 +315,14 @@ Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode,
}
// If we haven't found this binop, insert it.
- // TODO: Use the Builder, which will make CreateBinOp below fold with
- // InstSimplifyFolder.
- Instruction *BO = Builder.Insert(BinaryOperator::Create(Opcode, LHS, RHS));
- BO->setDebugLoc(Loc);
- if (Flags & SCEV::FlagNUW)
- BO->setHasNoUnsignedWrap();
- if (Flags & SCEV::FlagNSW)
- BO->setHasNoSignedWrap();
+ Value *BO = Builder.CreateBinOp(Opcode, LHS, RHS);
+ if (auto *BOI = dyn_cast<Instruction>(BO)) {
+ BOI->setDebugLoc(Loc);
+ if (Flags & SCEV::FlagNUW)
+ BOI->setHasNoUnsignedWrap();
+ if (Flags & SCEV::FlagNSW)
+ BOI->setHasNoSignedWrap();
+ }
return BO;
}
diff --git a/llvm/test/CodeGen/AArch64/sink-and-fold.ll b/llvm/test/CodeGen/AArch64/sink-and-fold.ll
index 163124c0d2757..43db4aa032d1a 100644
--- a/llvm/test/CodeGen/AArch64/sink-and-fold.ll
+++ b/llvm/test/CodeGen/AArch64/sink-and-fold.ll
@@ -151,7 +151,7 @@ define void @f4(ptr %a, i64 %n) nounwind "target-features"="+alu-lsl-fast" {
; CHECK-NEXT: .LBB4_3: // %LI
; CHECK-NEXT: // =>This Loop Header: Depth=1
; CHECK-NEXT: // Child Loop BB4_6 Depth 2
-; CHECK-NEXT: mov x8, xzr
+; CHECK-NEXT: mov x21, xzr
; CHECK-NEXT: add x23, x22, #1
; CHECK-NEXT: b .LBB4_6
; CHECK-NEXT: .LBB4_4: // %if.else
@@ -159,16 +159,14 @@ define void @f4(ptr %a, i64 %n) nounwind "target-features"="+alu-lsl-fast" {
; CHECK-NEXT: ldr w0, [x20, x22, lsl #2]
; CHECK-NEXT: .LBB4_5: // %LJ.latch
; CHECK-NEXT: // in Loop: Header=BB4_6 Depth=2
-; CHECK-NEXT: add x8, x21, #1
+; CHECK-NEXT: cmp x21, x19
; CHECK-NEXT: str w0, [x20, x21, lsl #2]
-; CHECK-NEXT: sub x9, x8, #1
-; CHECK-NEXT: cmp x9, x19
+; CHECK-NEXT: add x21, x21, #1
; CHECK-NEXT: b.ge .LBB4_2
; CHECK-NEXT: .LBB4_6: // %LJ
; CHECK-NEXT: // Parent Loop BB4_3 Depth=1
; CHECK-NEXT: // => This Inner Loop Header: Depth=2
-; CHECK-NEXT: mov x21, x8
-; CHECK-NEXT: ldr w8, [x20, x8, lsl #2]
+; CHECK-NEXT: ldr w8, [x20, x21, lsl #2]
; CHECK-NEXT: tbz w8, #31, .LBB4_4
; CHECK-NEXT: // %bb.7: // %if.then
; CHECK-NEXT: // in Loop: Header=BB4_6 Depth=2
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/no-dec-le-simple.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/no-dec-le-simple.ll
index 8e8934b6e9599..da72ddd81350d 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/no-dec-le-simple.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/no-dec-le-simple.ll
@@ -96,17 +96,18 @@ exit:
define void @cbz_exit_minsize(ptr %in, ptr %res) #0 {
; CHECK-LABEL: cbz_exit_minsize:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: movs r2, #0
+; CHECK-NEXT: push {r4, lr}
+; CHECK-NEXT: movs r3, #0
; CHECK-NEXT: .LBB3_1: @ %loop
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: ldr.w r3, [r0, r2, lsl #2]
-; CHECK-NEXT: adds r2, #1
-; CHECK-NEXT: cmp r3, #0
+; CHECK-NEXT: ldr.w r4, [r0, r3, lsl #2]
+; CHECK-NEXT: mov r2, r3
+; CHECK-NEXT: adds r3, #1
+; CHECK-NEXT: cmp r4, #0
; CHECK-NEXT: bne .LBB3_1
; CHECK-NEXT: @ %bb.2: @ %exit
-; CHECK-NEXT: subs r0, r2, #1
-; CHECK-NEXT: str r0, [r1]
-; CHECK-NEXT: bx lr
+; CHECK-NEXT: str r2, [r1]
+; CHECK-NEXT: pop {r4, pc}
entry:
br label %loop
@@ -126,17 +127,18 @@ exit:
define void @cbnz_exit_minsize(ptr %in, ptr %res) #0 {
; CHECK-LABEL: cbnz_exit_minsize:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: movs r2, #0
+; CHECK-NEXT: push {r4, lr}
+; CHECK-NEXT: movs r3, #0
; CHECK-NEXT: .LBB4_1: @ %loop
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: ldr.w r3, [r0, r2, lsl #2]
-; CHECK-NEXT: adds r2, #1
-; CHECK-NEXT: cmp r3, #0
+; CHECK-NEXT: ldr.w r4, [r0, r3, lsl #2]
+; CHECK-NEXT: mov r2, r3
+; CHECK-NEXT: adds r3, #1
+; CHECK-NEXT: cmp r4, #0
; CHECK-NEXT: beq .LBB4_1
; CHECK-NEXT: @ %bb.2: @ %exit
-; CHECK-NEXT: subs r0, r2, #1
-; CHECK-NEXT: str r0, [r1]
-; CHECK-NEXT: bx lr
+; CHECK-NEXT: str r2, [r1]
+; CHECK-NEXT: pop {r4, pc}
entry:
br label %loop
diff --git a/llvm/test/CodeGen/X86/break-false-dep.ll b/llvm/test/CodeGen/X86/break-false-dep.ll
index 6943622fac7f2..6df19767b2bb4 100644
--- a/llvm/test/CodeGen/X86/break-false-dep.ll
+++ b/llvm/test/CodeGen/X86/break-false-dep.ll
@@ -1308,28 +1308,27 @@ define dso_local void @loopclearance2(ptr nocapture %y, ptr %x, double %c1, doub
; SSE-LINUX-NEXT: .LBB13_1: # %inner_loop
; SSE-LINUX-NEXT: # =>This Inner Loop Header: Depth=1
; SSE-LINUX-NEXT: movq %rcx, %r8
-; SSE-LINUX-NEXT: shrq $6, %r8
-; SSE-LINUX-NEXT: movq (%rsi,%r8,8), %r8
-; SSE-LINUX-NEXT: btq %rcx, %r8
-; SSE-LINUX-NEXT: leaq 1(%rcx), %rcx
+; SSE-LINUX-NEXT: shrq $6, %rcx
+; SSE-LINUX-NEXT: movq (%rsi,%rcx,8), %r9
+; SSE-LINUX-NEXT: leaq 1(%r8), %rcx
+; SSE-LINUX-NEXT: btq %r8, %r9
; SSE-LINUX-NEXT: jae .LBB13_1
; SSE-LINUX-NEXT: # %bb.2: # %loop_end
; SSE-LINUX-NEXT: # in Loop: Header=BB13_1 Depth=1
-; SSE-LINUX-NEXT: leaq 1(%rax), %r8
+; SSE-LINUX-NEXT: leaq 1(%rax), %r9
; SSE-LINUX-NEXT: xorps %xmm4, %xmm4
-; SSE-LINUX-NEXT: cvtsi2sd %r8, %xmm4
+; SSE-LINUX-NEXT: cvtsi2sd %r9, %xmm4
; SSE-LINUX-NEXT: movapd %xmm0, %xmm5
; SSE-LINUX-NEXT: subsd %xmm4, %xmm5
; SSE-LINUX-NEXT: mulsd %xmm1, %xmm5
-; SSE-LINUX-NEXT: leaq -1(%rcx), %r9
; SSE-LINUX-NEXT: xorps %xmm4, %xmm4
-; SSE-LINUX-NEXT: cvtsi2sd %r9, %xmm4
+; SSE-LINUX-NEXT: cvtsi2sd %r8, %xmm4
; SSE-LINUX-NEXT: mulsd %xmm2, %xmm4
; SSE-LINUX-NEXT: addsd %xmm5, %xmm4
; SSE-LINUX-NEXT: divsd %xmm3, %xmm4
; SSE-LINUX-NEXT: movsd %xmm4, -8(%rdi,%rax,8)
-; SSE-LINUX-NEXT: movq %r8, %rax
-; SSE-LINUX-NEXT: cmpq %r8, %rdx
+; SSE-LINUX-NEXT: movq %r9, %rax
+; SSE-LINUX-NEXT: cmpq %r9, %rdx
; SSE-LINUX-NEXT: jge .LBB13_1
; SSE-LINUX-NEXT: # %bb.3: # %loopdone
; SSE-LINUX-NEXT: retq
@@ -1380,28 +1379,27 @@ define dso_local void @loopclearance2(ptr nocapture %y, ptr %x, double %c1, doub
; SSE-WIN-NEXT: .LBB13_1: # %inner_loop
; SSE-WIN-NEXT: # =>This Inner Loop Header: Depth=1
; SSE-WIN-NEXT: movq %r9, %r10
-; SSE-WIN-NEXT: shrq $6, %r10
-; SSE-WIN-NEXT: movq (%rdx,%r10,8), %r10
-; SSE-WIN-NEXT: btq %r9, %r10
-; SSE-WIN-NEXT: leaq 1(%r9), %r9
+; SSE-WIN-NEXT: shrq $6, %r9
+; SSE-WIN-NEXT: movq (%rdx,%r9,8), %r11
+; SSE-WIN-NEXT: leaq 1(%r10), %r9
+; SSE-WIN-NEXT: btq %r10, %r11
; SSE-WIN-NEXT: jae .LBB13_1
; SSE-WIN-NEXT: # %bb.2: # %loop_end
; SSE-WIN-NEXT: # in Loop: Header=BB13_1 Depth=1
-; SSE-WIN-NEXT: leaq 1(%r8), %r10
+; SSE-WIN-NEXT: leaq 1(%r8), %r11
; SSE-WIN-NEXT: xorps %xmm4, %xmm4
-; SSE-WIN-NEXT: cvtsi2sd %r10, %xmm4
+; SSE-WIN-NEXT: cvtsi2sd %r11, %xmm4
; SSE-WIN-NEXT: movapd %xmm2, %xmm5
; SSE-WIN-NEXT: subsd %xmm4, %xmm5
; SSE-WIN-NEXT: mulsd %xmm3, %xmm5
-; SSE-WIN-NEXT: leaq -1(%r9), %r11
; SSE-WIN-NEXT: xorps %xmm4, %xmm4
-; SSE-WIN-NEXT: cvtsi2sd %r11, %xmm4
+; SSE-WIN-NEXT: cvtsi2sd %r10, %xmm4
; SSE-WIN-NEXT: mulsd %xmm1, %xmm4
; SSE-WIN-NEXT: addsd %xmm5, %xmm4
; SSE-WIN-NEXT: divsd %xmm0, %xmm4
; SSE-WIN-NEXT: movsd %xmm4, -8(%rcx,%r8,8)
-; SSE-WIN-NEXT: movq %r10, %r8
-; SSE-WIN-NEXT: cmpq %r10, %rax
+; SSE-WIN-NEXT: movq %r11, %r8
+; SSE-WIN-NEXT: cmpq %r11, %rax
; SSE-WIN-NEXT: jge .LBB13_1
; SSE-WIN-NEXT: # %bb.3: # %loopdone
; SSE-WIN-NEXT: movaps (%rsp), %xmm7 # 16-byte Reload
@@ -1465,25 +1463,24 @@ define dso_local void @loopclearance2(ptr nocapture %y, ptr %x, double %c1, doub
; AVX1-NEXT: .LBB13_1: # %inner_loop
; AVX1-NEXT: # =>This Inner Loop Header: Depth=1
; AVX1-NEXT: movq %r9, %r10
-; AVX1-NEXT: shrq $6, %r10
-; AVX1-NEXT: movq (%rdx,%r10,8), %r10
-; AVX1-NEXT: btq %r9, %r10
-; AVX1-NEXT: leaq 1(%r9), %r9
+; AVX1-NEXT: shrq $6, %r9
+; AVX1-NEXT: movq (%rdx,%r9,8), %r11
+; AVX1-NEXT: leaq 1(%r10), %r9
+; AVX1-NEXT: btq %r10, %r11
; AVX1-NEXT: jae .LBB13_1
; AVX1-NEXT: # %bb.2: # %loop_end
; AVX1-NEXT: # in Loop: Header=BB13_1 Depth=1
-; AVX1-NEXT: leaq 1(%r8), %r10
-; AVX1-NEXT: vcvtsi2sd %r10, %xmm6, %xmm4
+; AVX1-NEXT: leaq 1(%r8), %r11
+; AVX1-NEXT: vcvtsi2sd %r11, %xmm6, %xmm4
; AVX1-NEXT: vsubsd %xmm4, %xmm2, %xmm4
; AVX1-NEXT: vmulsd %xmm3, %xmm4, %xmm4
-; AVX1-NEXT: leaq -1(%r9), %r11
-; AVX1-NEXT: vcvtsi2sd %r11, %xmm6, %xmm5
+; AVX1-NEXT: vcvtsi2sd %r10, %xmm6, %xmm5
; AVX1-NEXT: vmulsd %xmm1, %xmm5, %xmm5
; AVX1-NEXT: vaddsd %xmm5, %xmm4, %xmm4
; AVX1-NEXT: vdivsd %xmm0, %xmm4, %xmm4
; AVX1-NEXT: vmovsd %xmm4, -8(%rcx,%r8,8)
-; AVX1-NEXT: movq %r10, %r8
-; AVX1-NEXT: cmpq %r10, %rax
+; AVX1-NEXT: movq %r11, %r8
+; AVX1-NEXT: cmpq %r11, %rax
; AVX1-NEXT: jge .LBB13_1
; AVX1-NEXT: # %bb.3: # %loopdone
; AVX1-NEXT: vmovaps (%rsp), %xmm7 # 16-byte Reload
@@ -1547,25 +1544,24 @@ define dso_local void @loopclearance2(ptr nocapture %y, ptr %x, double %c1, doub
; AVX512VL-NEXT: .LBB13_1: # %inner_loop
; AVX512VL-NEXT: # =>This Inner Loop Header: Depth=1
; AVX512VL-NEXT: movq %r9, %r10
-; AVX512VL-NEXT: shrq $6, %r10
-; AVX512VL-NEXT: movq (%rdx,%r10,8), %r10
-; AVX512VL-NEXT: btq %r9, %r10
-; AVX512VL-NEXT: leaq 1(%r9), %r9
+; AVX512VL-NEXT: shrq $6, %r9
+; AVX512VL-NEXT: movq (%rdx,%r9,8), %r11
+; AVX512VL-NEXT: leaq 1(%r10), %r9
+; AVX512VL-NEXT: btq %r10, %r11
; AVX512VL-NEXT: jae .LBB13_1
; AVX512VL-NEXT: # %bb.2: # %loop_end
; AVX512VL-NEXT: # in Loop: Header=BB13_1 Depth=1
-; AVX512VL-NEXT: leaq 1(%r8), %r10
-; AVX512VL-NEXT: vcvtsi2sd %r10, %xmm6, %xmm4
+; AVX512VL-NEXT: leaq 1(%r8), %r11
+; AVX512VL-NEXT: vcvtsi2sd %r11, %xmm6, %xmm4
; AVX512VL-NEXT: vsubsd %xmm4, %xmm2, %xmm4
; AVX512VL-NEXT: vmulsd %xmm3, %xmm4, %xmm4
-; AVX512VL-NEXT: leaq -1(%r9), %r11
-; AVX512VL-NEXT: vcvtsi2sd %r11, %xmm6, %xmm5
+; AVX512VL-NEXT: vcvtsi2sd %r10, %xmm6, %xmm5
; AVX512VL-NEXT: vmulsd %xmm1, %xmm5, %xmm5
; AVX512VL-NEXT: vaddsd %xmm5, %xmm4, %xmm4
; AVX512VL-NEXT: vdivsd %xmm0, %xmm4, %xmm4
; AVX512VL-NEXT: vmovsd %xmm4, -8(%rcx,%r8,8)
-; AVX512VL-NEXT: movq %r10, %r8
-; AVX512VL-NEXT: cmpq %r10, %rax
+; AVX512VL-NEXT: movq %r11, %r8
+; AVX512VL-NEXT: cmpq %r11, %rax
; AVX512VL-NEXT: jge .LBB13_1
; AVX512VL-NEXT: # %bb.3: # %loopdone
; AVX512VL-NEXT: vmovaps (%rsp), %xmm7 # 16-byte Reload
diff --git a/llvm/test/Transforms/CodeGenPrepare/AArch64/large-offset-gep.ll b/llvm/test/Transforms/CodeGenPrepare/AArch64/large-offset-gep.ll
index 097575ca86bcc..94d6b180c061a 100644
--- a/llvm/test/Transforms/CodeGenPrepare/AArch64/large-offset-gep.ll
+++ b/llvm/test/Transforms/CodeGenPrepare/AArch64/large-offset-gep.ll
@@ -13,9 +13,10 @@ define void @test1(ptr %s, i32 %n) {
; CHECK-NEXT: b.ge .LBB0_2
; CHECK-NEXT: .LBB0_1: // %while_body
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: str w9, [x8, #3140]
+; CHECK-NEXT: mov w10, w9
; CHECK-NEXT: add w9, w9, #1
; CHECK-NEXT: str w9, [x8, #3136]
+; CHECK-NEXT: str w10, [x8, #3140]
; CHECK-NEXT: cmp w9, w1
; CHECK-NEXT: b.lt .LBB0_1
; CHECK-NEXT: .LBB0_2: // %while_end
@@ -52,9 +53,10 @@ define void @test2(ptr %struct, i32 %n) {
; CHECK-NEXT: b.ge .LBB1_3
; CHECK-NEXT: .LBB1_2: // %while_body
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: str w9, [x8, #3140]
+; CHECK-NEXT: mov w10, w9
; CHECK-NEXT: add w9, w9, #1
; CHECK-NEXT: str w9, [x8, #3136]
+; CHECK-NEXT: str w10, [x8, #3140]
; CHECK-NEXT: cmp w9, w1
; CHECK-NEXT: b.lt .LBB1_2
; CHECK-NEXT: .LBB1_3: // %while_end
@@ -93,9 +95,10 @@ define void @test3(ptr %s1, ptr %s2, i1 %cond, i32 %n) {
; CHECK-NEXT: b.ge .LBB2_3
; CHECK-NEXT: .LBB2_2: // %while_body
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: str w9, [x8, #3140]
+; CHECK-NEXT: mov w10, w9
; CHECK-NEXT: add w9, w9, #1
; CHECK-NEXT: str w9, [x8, #3136]
+; CHECK-NEXT: str w10, [x8, #3140]
; CHECK-NEXT: cmp w9, w3
; CHECK-NEXT: b.lt .LBB2_2
; CHECK-NEXT: .LBB2_3: // %while_end
@@ -146,10 +149,11 @@ define void @test4(i32 %n) uwtable personality ptr @__FrameHandler {
; CHECK-NEXT: .cfi_offset w30, -32
; CHECK-NEXT: .cfi_remember_state
; CHECK-NEXT: mov w19, w0
-; CHECK-NEXT: mov w20, wzr
+; CHECK-NEXT: mov w9, wzr
; CHECK-NEXT: .LBB3_1: // %while_cond
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: .Ltmp0:
+; CHECK-NEXT: mov w20, w9
; CHECK-NEXT: bl foo
; CHECK-NEXT: .Ltmp1:
; CHECK-NEXT: // %bb.2: // %while_cond_x.split
@@ -160,9 +164,9 @@ define void @test4(i32 %n) uwtable personality ptr @__FrameHandler {
; CHECK-NEXT: b.ge .LBB3_4
; CHECK-NEXT: // %bb.3: // %while_body
; CHECK-NEXT: // in Loop: Header=BB3_1 Depth=1
+; CHECK-NEXT: add w9, w20, #1
; CHECK-NEXT: str w20, [x8, #3140]
-; CHECK-NEXT: add w20, w20, #1
-; CHECK-NEXT: str w20, [x8, #3136]
+; CHECK-NEXT: str w9, [x8, #3136]
; CHECK-NEXT: b .LBB3_1
; CHECK-NEXT: .LBB3_4: // %while_end
; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
@@ -221,9 +225,10 @@ define void @test5(ptr %s, i32 %n) {
; CHECK-NEXT: b.ge .LBB4_2
; CHECK-NEXT: .LBB4_1: // %while_body
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: str w9, [x8, #2180]
+; CHECK-NEXT: mov w10, w9
; CHECK-NEXT: add w9, w9, #1
; CHECK-NEXT: str w9, [x8, #2176]
+; CHECK-NEXT: str w10, [x8, #2180]
; CHECK-NEXT: cmp w9, w1
; CHECK-NEXT: b.lt .LBB4_1
; CHECK-NEXT: .LBB4_2: // %while_end
diff --git a/llvm/test/Transforms/LoopStrengthReduce/X86/2011-11-29-postincphi.ll b/llvm/test/Transforms/LoopStrengthReduce/X86/2011-11-29-postincphi.ll
index 7ae78ae6a1fd4..6a70fe48de7e2 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/X86/2011-11-29-postincphi.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/X86/2011-11-29-postincphi.ll
@@ -20,17 +20,16 @@ define i64 @sqlite3DropTriggerPtr() nounwind {
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: .LBB0_1: # %bb1
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: movq %rbx, %rcx
; CHECK-NEXT: testb %al, %al
-; CHECK-NEXT: je .LBB0_4
+; CHECK-NEXT: je .LBB0_3
; CHECK-NEXT: # %bb.2: # %bb4
; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1
-; CHECK-NEXT: incq %rbx
+; CHECK-NEXT: leaq 1(%rcx), %rbx
; CHECK-NEXT: testb %al, %al
; CHECK-NEXT: jne .LBB0_1
-; CHECK-NEXT: # %bb.3: # %bb8split
-; CHECK-NEXT: decq %rbx
-; CHECK-NEXT: .LBB0_4: # %bb8
-; CHECK-NEXT: movq %rbx, %rax
+; CHECK-NEXT: .LBB0_3: # %bb8
+; CHECK-NEXT: movq %rcx, %rax
; CHECK-NEXT: popq %rbx
; CHECK-NEXT: retq
bb:
diff --git a/llvm/test/Transforms/LoopStrengthReduce/X86/pr46943.ll b/llvm/test/Transforms/LoopStrengthReduce/X86/pr46943.ll
index 89523b4487d49..b59ca63abfa41 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/X86/pr46943.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/X86/pr46943.ll
@@ -19,8 +19,7 @@ define i8 @drop_nuw() {
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[IV_NEXT]], 0
; CHECK-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[LOOP]]
; CHECK: exit:
-; CHECK-NEXT: [[TMP0:%.*]] = add i8 [[IV_NEXT]], -1
-; CHECK-NEXT: ret i8 [[TMP0]]
+; CHECK-NEXT: ret i8 [[IV]]
;
entry:
br label %loop
@@ -49,8 +48,7 @@ define i8 @drop_nsw() {
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[IV_NEXT]], 127
; CHECK-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[LOOP]]
; CHECK: exit:
-; CHECK-NEXT: [[TMP0:%.*]] = add i8 [[IV_NEXT]], 1
-; CHECK-NEXT: ret i8 [[TMP0]]
+; CHECK-NEXT: ret i8 [[IV]]
;
entry:
br label %loop
@@ -79,8 +77,7 @@ define i8 @already_postinc() {
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[IV_NEXT]], -1
; CHECK-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[LOOP]]
; CHECK: exit:
-; CHECK-NEXT: [[TMP0:%.*]] = add i8 [[IV_NEXT]], -1
-; CHECK-NEXT: ret i8 [[TMP0]]
+; CHECK-NEXT: ret i8 [[IV]]
;
entry:
br label %loop
diff --git a/llvm/test/Transforms/LoopStrengthReduce/depth-limit-overrun.ll b/llvm/test/Transforms/LoopStrengthReduce/depth-limit-overrun.ll
index 9c3698a740992..a46d75ae537ab 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/depth-limit-overrun.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/depth-limit-overrun.ll
@@ -22,10 +22,9 @@ define void @test(i32 %A, i32 %B, i32 %C) {
; DEFAULT: preheader:
; DEFAULT-NEXT: [[I15:%.*]] = shl i32 [[B]], 1
; DEFAULT-NEXT: [[TMP1:%.*]] = mul i32 [[PHI2]], -1
-; DEFAULT-NEXT: [[TMP2:%.*]] = mul i32 [[TMP1]], -1
-; DEFAULT-NEXT: [[TMP3:%.*]] = sub i32 [[PHI4]], [[TMP2]]
+; DEFAULT-NEXT: [[TMP3:%.*]] = sub i32 [[PHI4]], [[PHI2]]
; DEFAULT-NEXT: [[TMP4:%.*]] = add i32 [[B]], [[PHI4]]
-; DEFAULT-NEXT: [[TMP5:%.*]] = sub i32 [[TMP4]], [[TMP2]]
+; DEFAULT-NEXT: [[TMP5:%.*]] = sub i32 [[TMP4]], [[PHI2]]
; DEFAULT-NEXT: [[TMP6:%.*]] = sub i32 14, [[TMP5]]
; DEFAULT-NEXT: [[TMP7:%.*]] = add i32 [[TMP0]], [[PHI2]]
; DEFAULT-NEXT: br label [[INNER_LOOP:%.*]]
@@ -87,10 +86,9 @@ define void @test(i32 %A, i32 %B, i32 %C) {
; LIMIT: preheader:
; LIMIT-NEXT: [[I15:%.*]] = shl i32 [[B]], 1
; LIMIT-NEXT: [[TMP1:%.*]] = mul i32 [[PHI2]], -1
-; LIMIT-NEXT: [[TMP2:%.*]] = mul i32 [[TMP1]], -1
-; LIMIT-NEXT: [[TMP3:%.*]] = sub i32 [[PHI4]], [[TMP2]]
+; LIMIT-NEXT: [[TMP3:%.*]] = sub i32 [[PHI4]], [[PHI2]]
; LIMIT-NEXT: [[TMP4:%.*]] = add i32 [[B]], [[PHI4]]
-; LIMIT-NEXT: [[TMP5:%.*]] = sub i32 [[TMP4]], [[TMP2]]
+; LIMIT-NEXT: [[TMP5:%.*]] = sub i32 [[TMP4]], [[PHI2]]
; LIMIT-NEXT: [[TMP6:%.*]] = sub i32 14, [[TMP5]]
; LIMIT-NEXT: [[TMP7:%.*]] = add i32 [[TMP0]], [[PHI2]]
; LIMIT-NEXT: br label [[INNER_LOOP:%.*]]
diff --git a/llvm/test/Transforms/LoopStrengthReduce/lsr-rewrite-to-add-one.ll b/llvm/test/Transforms/LoopStrengthReduce/lsr-rewrite-to-add-one.ll
index 74d861cf5168c..d66905ae26f14 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/lsr-rewrite-to-add-one.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/lsr-rewrite-to-add-one.ll
@@ -20,8 +20,7 @@ define i32 @test(i1 %c.1, ptr %src) {
; CHECK-NEXT: [[OR:%.*]] = or i1 [[P]], [[T]]
; CHECK-NEXT: [[ZEXT_OR:%.*]] = zext i1 [[OR]] to i32
; CHECK-NEXT: [[LSR_IV_NEXT]] = add nuw i32 [[LSR_IV]], 1
-; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[LSR_IV_NEXT]], -1
-; CHECK-NEXT: [[LOOP_HEADER_TERMCOND:%.*]] = icmp sgt i32 [[TMP0]], -1050
+; CHECK-NEXT: [[LOOP_HEADER_TERMCOND:%.*]] = icmp sgt i32 [[LSR_IV]], -1050
; CHECK-NEXT: br i1 [[LOOP_HEADER_TERMCOND]], label [[LOOP_HEADER]], label [[EXIT:%.*]]
; CHECK: exit:
; CHECK-NEXT: [[ZEXT_OR_LCSSA:%.*]] = phi i32 [ [[ZEXT_OR]], [[LOOP_LATCH]] ]
diff --git a/llvm/test/Transforms/LoopStrengthReduce/wrong-hoisting-iv.ll b/llvm/test/Transforms/LoopStrengthReduce/wrong-hoisting-iv.ll
index 502042eaf9b9c..007d84f9120e3 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/wrong-hoisting-iv.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/wrong-hoisting-iv.ll
@@ -16,9 +16,7 @@ define void @test1() {
; CHECK-NEXT: [[VAL4:%.*]] = sub i32 [[VAL]], [[VAL3]]
; CHECK-NEXT: [[VAL5:%.*]] = ashr i32 undef, undef
; CHECK-NEXT: [[VAL6:%.*]] = sub i32 [[VAL4]], [[VAL5]]
-; CHECK-NEXT: [[TMP0:%.*]] = mul i32 [[VAL]], 7
-; CHECK-NEXT: [[TMP1:%.*]] = mul i32 [[VAL3]], 7
-; CHECK-NEXT: [[TMP2:%.*]] = sub i32 [[TMP0]], [[TMP1]]
+; CHECK-NEXT: [[TMP2:%.*]] = mul i32 [[VAL]], 7
; CHECK-NEXT: [[TMP3:%.*]] = mul i32 [[VAL5]], 7
; CHECK-NEXT: [[TMP4:%.*]] = sub i32 [[TMP2]], [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = shl i32 [[VAL6]], 3
@@ -58,10 +56,8 @@ define void @test1() {
; CHECK: bb20.bb15splitsplitsplit_crit_edge:
; CHECK-NEXT: [[TMP14:%.*]] = mul i32 [[VAL]], 3
; CHECK-NEXT: [[TMP15:%.*]] = mul i32 [[VAL1]], [[VAL2]]
-; CHECK-NEXT: [[TMP16:%.*]] = mul i32 [[TMP15]], 3
-; CHECK-NEXT: [[TMP17:%.*]] = sub i32 [[TMP14]...
[truncated]
|
if (Flags & SCEV::FlagNUW) | ||
BOI->setHasNoUnsignedWrap(); | ||
if (Flags & SCEV::FlagNSW) | ||
BOI->setHasNoSignedWrap(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You can't set flags after the fact with InstSimplifyFolder. You need to add a new IRBuilder method like CreateNoWrapBinOp that accepts the flags as arguments.
@@ -19,8 +19,7 @@ define i8 @drop_nuw() { | |||
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[IV_NEXT]], 0 | |||
; CHECK-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[LOOP]] | |||
; CHECK: exit: | |||
; CHECK-NEXT: [[TMP0:%.*]] = add i8 [[IV_NEXT]], -1 | |||
; CHECK-NEXT: ret i8 [[TMP0]] | |||
; CHECK-NEXT: ret i8 [[IV]] |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Without looking into it too deeply, changes like this might be problematic for LSR? Now the live-out is going to be the pre-inc IV rather than the post-inc one. (LSR should probably be wrapping things into Unknown in cases where it cares.)
Removes the shift pair from add1_3 in #146241.
I think there may be some regressions in here.