Skip to content

Commit 9b1bf4c

Browse files
phoebewangdianqk
authored andcommitted
[X86] Ignore NSW when DstSVT is i32 (llvm#131755)
We don't have PACKSS for i64->i32. Fixes: https://godbolt.org/z/qb8nxnPbK, which was introduced by ddd2f57 (cherry picked from commit 3d63191)
1 parent 7341322 commit 9b1bf4c

File tree

2 files changed

+90
-1
lines changed

2 files changed

+90
-1
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20889,7 +20889,8 @@ static SDValue matchTruncateWithPACK(unsigned &PackOpcode, EVT DstVT,
2088920889
return SDValue();
2089020890

2089120891
unsigned MinSignBits = NumSrcEltBits - NumPackedSignBits;
20892-
if (Flags.hasNoSignedWrap() || MinSignBits < NumSignBits) {
20892+
if ((Flags.hasNoSignedWrap() && DstSVT != MVT::i32) ||
20893+
MinSignBits < NumSignBits) {
2089320894
PackOpcode = X86ISD::PACKSS;
2089420895
return In;
2089520896
}

llvm/test/CodeGen/X86/vector-trunc-nowrap.ll

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1592,3 +1592,91 @@ entry:
15921592
%1 = bitcast <8 x i8> %0 to i64
15931593
ret i64 %1
15941594
}
1595+
1596+
define void @foo(<4 x i64> %a, <4 x i64> %b, ptr %p) "min-legal-vector-width"="256" "prefer-vector-width"="256" {
1597+
; SSE-LABEL: foo:
1598+
; SSE: # %bb.0: # %entry
1599+
; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
1600+
; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2]
1601+
; SSE-NEXT: movaps %xmm2, 16(%rdi)
1602+
; SSE-NEXT: movaps %xmm0, (%rdi)
1603+
; SSE-NEXT: retq
1604+
;
1605+
; AVX1-LABEL: foo:
1606+
; AVX1: # %bb.0: # %entry
1607+
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1608+
; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[0,2]
1609+
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1610+
; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,2],xmm2[0,2]
1611+
; AVX1-NEXT: vmovaps %xmm1, 16(%rdi)
1612+
; AVX1-NEXT: vmovaps %xmm0, (%rdi)
1613+
; AVX1-NEXT: vzeroupper
1614+
; AVX1-NEXT: retq
1615+
;
1616+
; AVX2-SLOW-LABEL: foo:
1617+
; AVX2-SLOW: # %bb.0: # %entry
1618+
; AVX2-SLOW-NEXT: vextractf128 $1, %ymm0, %xmm2
1619+
; AVX2-SLOW-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[0,2]
1620+
; AVX2-SLOW-NEXT: vextractf128 $1, %ymm1, %xmm2
1621+
; AVX2-SLOW-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,2],xmm2[0,2]
1622+
; AVX2-SLOW-NEXT: vmovaps %xmm1, 16(%rdi)
1623+
; AVX2-SLOW-NEXT: vmovaps %xmm0, (%rdi)
1624+
; AVX2-SLOW-NEXT: vzeroupper
1625+
; AVX2-SLOW-NEXT: retq
1626+
;
1627+
; AVX2-FAST-ALL-LABEL: foo:
1628+
; AVX2-FAST-ALL: # %bb.0: # %entry
1629+
; AVX2-FAST-ALL-NEXT: vmovaps {{.*#+}} ymm2 = [0,2,4,6,4,6,6,7]
1630+
; AVX2-FAST-ALL-NEXT: vpermps %ymm0, %ymm2, %ymm0
1631+
; AVX2-FAST-ALL-NEXT: vpermps %ymm1, %ymm2, %ymm1
1632+
; AVX2-FAST-ALL-NEXT: vmovaps %xmm1, 16(%rdi)
1633+
; AVX2-FAST-ALL-NEXT: vmovaps %xmm0, (%rdi)
1634+
; AVX2-FAST-ALL-NEXT: vzeroupper
1635+
; AVX2-FAST-ALL-NEXT: retq
1636+
;
1637+
; AVX2-FAST-PERLANE-LABEL: foo:
1638+
; AVX2-FAST-PERLANE: # %bb.0: # %entry
1639+
; AVX2-FAST-PERLANE-NEXT: vextractf128 $1, %ymm0, %xmm2
1640+
; AVX2-FAST-PERLANE-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[0,2]
1641+
; AVX2-FAST-PERLANE-NEXT: vextractf128 $1, %ymm1, %xmm2
1642+
; AVX2-FAST-PERLANE-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,2],xmm2[0,2]
1643+
; AVX2-FAST-PERLANE-NEXT: vmovaps %xmm1, 16(%rdi)
1644+
; AVX2-FAST-PERLANE-NEXT: vmovaps %xmm0, (%rdi)
1645+
; AVX2-FAST-PERLANE-NEXT: vzeroupper
1646+
; AVX2-FAST-PERLANE-NEXT: retq
1647+
;
1648+
; AVX512F-LABEL: foo:
1649+
; AVX512F: # %bb.0: # %entry
1650+
; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1651+
; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
1652+
; AVX512F-NEXT: vpmovqd %zmm0, (%rdi)
1653+
; AVX512F-NEXT: vzeroupper
1654+
; AVX512F-NEXT: retq
1655+
;
1656+
; AVX512VL-LABEL: foo:
1657+
; AVX512VL: # %bb.0: # %entry
1658+
; AVX512VL-NEXT: vpmovqd %ymm1, 16(%rdi)
1659+
; AVX512VL-NEXT: vpmovqd %ymm0, (%rdi)
1660+
; AVX512VL-NEXT: vzeroupper
1661+
; AVX512VL-NEXT: retq
1662+
;
1663+
; AVX512BW-LABEL: foo:
1664+
; AVX512BW: # %bb.0: # %entry
1665+
; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1666+
; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
1667+
; AVX512BW-NEXT: vpmovqd %zmm0, (%rdi)
1668+
; AVX512BW-NEXT: vzeroupper
1669+
; AVX512BW-NEXT: retq
1670+
;
1671+
; AVX512BWVL-LABEL: foo:
1672+
; AVX512BWVL: # %bb.0: # %entry
1673+
; AVX512BWVL-NEXT: vpmovqd %ymm1, 16(%rdi)
1674+
; AVX512BWVL-NEXT: vpmovqd %ymm0, (%rdi)
1675+
; AVX512BWVL-NEXT: vzeroupper
1676+
; AVX512BWVL-NEXT: retq
1677+
entry:
1678+
%0 = shufflevector <4 x i64> %a, <4 x i64> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1679+
%1 = trunc nsw <8 x i64> %0 to <8 x i32>
1680+
store <8 x i32> %1, ptr %p, align 16
1681+
ret void
1682+
}

0 commit comments

Comments
 (0)