@@ -1592,3 +1592,91 @@ entry:
1592
1592
%1 = bitcast <8 x i8 > %0 to i64
1593
1593
ret i64 %1
1594
1594
}
1595
+
1596
+ define void @foo (<4 x i64 > %a , <4 x i64 > %b , ptr %p ) "min-legal-vector-width" ="256" "prefer-vector-width" ="256" {
1597
+ ; SSE-LABEL: foo:
1598
+ ; SSE: # %bb.0: # %entry
1599
+ ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
1600
+ ; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2]
1601
+ ; SSE-NEXT: movaps %xmm2, 16(%rdi)
1602
+ ; SSE-NEXT: movaps %xmm0, (%rdi)
1603
+ ; SSE-NEXT: retq
1604
+ ;
1605
+ ; AVX1-LABEL: foo:
1606
+ ; AVX1: # %bb.0: # %entry
1607
+ ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1608
+ ; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[0,2]
1609
+ ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1610
+ ; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,2],xmm2[0,2]
1611
+ ; AVX1-NEXT: vmovaps %xmm1, 16(%rdi)
1612
+ ; AVX1-NEXT: vmovaps %xmm0, (%rdi)
1613
+ ; AVX1-NEXT: vzeroupper
1614
+ ; AVX1-NEXT: retq
1615
+ ;
1616
+ ; AVX2-SLOW-LABEL: foo:
1617
+ ; AVX2-SLOW: # %bb.0: # %entry
1618
+ ; AVX2-SLOW-NEXT: vextractf128 $1, %ymm0, %xmm2
1619
+ ; AVX2-SLOW-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[0,2]
1620
+ ; AVX2-SLOW-NEXT: vextractf128 $1, %ymm1, %xmm2
1621
+ ; AVX2-SLOW-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,2],xmm2[0,2]
1622
+ ; AVX2-SLOW-NEXT: vmovaps %xmm1, 16(%rdi)
1623
+ ; AVX2-SLOW-NEXT: vmovaps %xmm0, (%rdi)
1624
+ ; AVX2-SLOW-NEXT: vzeroupper
1625
+ ; AVX2-SLOW-NEXT: retq
1626
+ ;
1627
+ ; AVX2-FAST-ALL-LABEL: foo:
1628
+ ; AVX2-FAST-ALL: # %bb.0: # %entry
1629
+ ; AVX2-FAST-ALL-NEXT: vmovaps {{.*#+}} ymm2 = [0,2,4,6,4,6,6,7]
1630
+ ; AVX2-FAST-ALL-NEXT: vpermps %ymm0, %ymm2, %ymm0
1631
+ ; AVX2-FAST-ALL-NEXT: vpermps %ymm1, %ymm2, %ymm1
1632
+ ; AVX2-FAST-ALL-NEXT: vmovaps %xmm1, 16(%rdi)
1633
+ ; AVX2-FAST-ALL-NEXT: vmovaps %xmm0, (%rdi)
1634
+ ; AVX2-FAST-ALL-NEXT: vzeroupper
1635
+ ; AVX2-FAST-ALL-NEXT: retq
1636
+ ;
1637
+ ; AVX2-FAST-PERLANE-LABEL: foo:
1638
+ ; AVX2-FAST-PERLANE: # %bb.0: # %entry
1639
+ ; AVX2-FAST-PERLANE-NEXT: vextractf128 $1, %ymm0, %xmm2
1640
+ ; AVX2-FAST-PERLANE-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[0,2]
1641
+ ; AVX2-FAST-PERLANE-NEXT: vextractf128 $1, %ymm1, %xmm2
1642
+ ; AVX2-FAST-PERLANE-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,2],xmm2[0,2]
1643
+ ; AVX2-FAST-PERLANE-NEXT: vmovaps %xmm1, 16(%rdi)
1644
+ ; AVX2-FAST-PERLANE-NEXT: vmovaps %xmm0, (%rdi)
1645
+ ; AVX2-FAST-PERLANE-NEXT: vzeroupper
1646
+ ; AVX2-FAST-PERLANE-NEXT: retq
1647
+ ;
1648
+ ; AVX512F-LABEL: foo:
1649
+ ; AVX512F: # %bb.0: # %entry
1650
+ ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1651
+ ; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
1652
+ ; AVX512F-NEXT: vpmovqd %zmm0, (%rdi)
1653
+ ; AVX512F-NEXT: vzeroupper
1654
+ ; AVX512F-NEXT: retq
1655
+ ;
1656
+ ; AVX512VL-LABEL: foo:
1657
+ ; AVX512VL: # %bb.0: # %entry
1658
+ ; AVX512VL-NEXT: vpmovqd %ymm1, 16(%rdi)
1659
+ ; AVX512VL-NEXT: vpmovqd %ymm0, (%rdi)
1660
+ ; AVX512VL-NEXT: vzeroupper
1661
+ ; AVX512VL-NEXT: retq
1662
+ ;
1663
+ ; AVX512BW-LABEL: foo:
1664
+ ; AVX512BW: # %bb.0: # %entry
1665
+ ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1666
+ ; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
1667
+ ; AVX512BW-NEXT: vpmovqd %zmm0, (%rdi)
1668
+ ; AVX512BW-NEXT: vzeroupper
1669
+ ; AVX512BW-NEXT: retq
1670
+ ;
1671
+ ; AVX512BWVL-LABEL: foo:
1672
+ ; AVX512BWVL: # %bb.0: # %entry
1673
+ ; AVX512BWVL-NEXT: vpmovqd %ymm1, 16(%rdi)
1674
+ ; AVX512BWVL-NEXT: vpmovqd %ymm0, (%rdi)
1675
+ ; AVX512BWVL-NEXT: vzeroupper
1676
+ ; AVX512BWVL-NEXT: retq
1677
+ entry:
1678
+ %0 = shufflevector <4 x i64 > %a , <4 x i64 > %b , <8 x i32 > <i32 0 , i32 1 , i32 2 , i32 3 , i32 4 , i32 5 , i32 6 , i32 7 >
1679
+ %1 = trunc nsw <8 x i64 > %0 to <8 x i32 >
1680
+ store <8 x i32 > %1 , ptr %p , align 16
1681
+ ret void
1682
+ }
0 commit comments