@@ -4677,37 +4677,33 @@ define <2 x bfloat> @v_copysign_out_v2bf16_mag_v2bf16_sign_v2f64(<2 x bfloat> %m
4677
4677
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4678
4678
; GCN-NEXT: v_mul_f32_e32 v0, 1.0, v0
4679
4679
; GCN-NEXT: v_mul_f32_e32 v1, 1.0, v1
4680
- ; GCN-NEXT: v_cvt_f32_f64_e32 v2, v[2:3]
4681
- ; GCN-NEXT: v_cvt_f32_f64_e32 v3, v[4:5]
4680
+ ; GCN-NEXT: v_and_b32_e32 v2, 0x80000000, v5
4681
+ ; GCN-NEXT: v_and_b32_e32 v3, 0x80000000, v3
4682
4682
; GCN-NEXT: v_lshrrev_b32_e32 v2, 16, v2
4683
- ; GCN-NEXT: v_lshrrev_b32_e32 v3, 16, v3
4684
4683
; GCN-NEXT: v_bfe_u32 v1, v1, 16, 15
4684
+ ; GCN-NEXT: v_lshrrev_b32_e32 v3, 16, v3
4685
4685
; GCN-NEXT: v_bfe_u32 v0, v0, 16, 15
4686
- ; GCN-NEXT: v_and_b32_e32 v3, 0x8000, v3
4687
- ; GCN-NEXT: v_and_b32_e32 v2, 0x8000, v2
4688
- ; GCN-NEXT: v_or_b32_e32 v1, v1, v3
4689
- ; GCN-NEXT: v_or_b32_e32 v0, v0, v2
4690
- ; GCN-NEXT: v_lshlrev_b32_e32 v0, 16, v0
4686
+ ; GCN-NEXT: v_or_b32_e32 v1, v1, v2
4687
+ ; GCN-NEXT: v_or_b32_e32 v0, v0, v3
4691
4688
; GCN-NEXT: v_lshlrev_b32_e32 v1, 16, v1
4689
+ ; GCN-NEXT: v_lshlrev_b32_e32 v0, 16, v0
4692
4690
; GCN-NEXT: s_setpc_b64 s[30:31]
4693
4691
;
4694
4692
; GFX7-LABEL: v_copysign_out_v2bf16_mag_v2bf16_sign_v2f64:
4695
4693
; GFX7: ; %bb.0:
4696
4694
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4697
- ; GFX7-NEXT: v_cvt_f32_f64_e32 v2, v[2:3]
4698
- ; GFX7-NEXT: v_cvt_f32_f64_e32 v3, v[4:5]
4699
- ; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
4700
4695
; GFX7-NEXT: v_mul_f32_e32 v1, 1.0, v1
4696
+ ; GFX7-NEXT: v_and_b32_e32 v2, 0x80000000, v5
4701
4697
; GFX7-NEXT: v_lshrrev_b32_e32 v2, 16, v2
4702
- ; GFX7-NEXT: v_lshrrev_b32_e32 v3, 16, v3
4703
- ; GFX7-NEXT: v_and_b32_e32 v3, 0x8000, v3
4704
4698
; GFX7-NEXT: v_bfe_u32 v1, v1, 16, 15
4705
- ; GFX7-NEXT: v_and_b32_e32 v2, 0x8000, v2
4699
+ ; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
4700
+ ; GFX7-NEXT: v_or_b32_e32 v1, v1, v2
4701
+ ; GFX7-NEXT: v_and_b32_e32 v2, 0x80000000, v3
4702
+ ; GFX7-NEXT: v_lshrrev_b32_e32 v2, 16, v2
4706
4703
; GFX7-NEXT: v_bfe_u32 v0, v0, 16, 15
4707
- ; GFX7-NEXT: v_or_b32_e32 v1, v1, v3
4708
4704
; GFX7-NEXT: v_or_b32_e32 v0, v0, v2
4709
- ; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0
4710
4705
; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1
4706
+ ; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0
4711
4707
; GFX7-NEXT: s_setpc_b64 s[30:31]
4712
4708
;
4713
4709
; GFX8-LABEL: v_copysign_out_v2bf16_mag_v2bf16_sign_v2f64:
@@ -5585,35 +5581,31 @@ define amdgpu_ps i32 @s_copysign_out_v2bf16_mag_v2bf16_sign_v2f64(<2 x bfloat> i
5585
5581
; GCN: ; %bb.0:
5586
5582
; GCN-NEXT: v_mul_f32_e64 v0, 1.0, s1
5587
5583
; GCN-NEXT: v_mul_f32_e64 v1, 1.0, s0
5588
- ; GCN-NEXT: v_cvt_f32_f64_e32 v2, s[4:5]
5589
- ; GCN-NEXT: v_cvt_f32_f64_e32 v3, s[2:3]
5590
- ; GCN-NEXT: v_lshrrev_b32_e32 v2, 16, v2
5591
- ; GCN-NEXT: v_lshrrev_b32_e32 v3, 16, v3
5584
+ ; GCN-NEXT: s_and_b32 s0, s3, 0x80000000
5585
+ ; GCN-NEXT: s_and_b32 s1, s5, 0x80000000
5586
+ ; GCN-NEXT: s_lshr_b32 s0, s0, 16
5592
5587
; GCN-NEXT: v_bfe_u32 v1, v1, 16, 15
5588
+ ; GCN-NEXT: s_lshr_b32 s1, s1, 16
5593
5589
; GCN-NEXT: v_bfe_u32 v0, v0, 16, 15
5594
- ; GCN-NEXT: v_and_b32_e32 v3, 0x8000, v3
5595
- ; GCN-NEXT: v_and_b32_e32 v2, 0x8000, v2
5596
- ; GCN-NEXT: v_or_b32_e32 v1, v1, v3
5597
- ; GCN-NEXT: v_or_b32_e32 v0, v0, v2
5590
+ ; GCN-NEXT: v_or_b32_e32 v1, s0, v1
5591
+ ; GCN-NEXT: v_or_b32_e32 v0, s1, v0
5598
5592
; GCN-NEXT: v_lshlrev_b32_e32 v0, 16, v0
5599
5593
; GCN-NEXT: v_or_b32_e32 v0, v1, v0
5600
5594
; GCN-NEXT: v_readfirstlane_b32 s0, v0
5601
5595
; GCN-NEXT: ; return to shader part epilog
5602
5596
;
5603
5597
; GFX7-LABEL: s_copysign_out_v2bf16_mag_v2bf16_sign_v2f64:
5604
5598
; GFX7: ; %bb.0:
5605
- ; GFX7-NEXT: v_cvt_f32_f64_e32 v0, s[4:5]
5606
- ; GFX7-NEXT: v_cvt_f32_f64_e32 v1, s[2:3]
5607
- ; GFX7-NEXT: v_mul_f32_e64 v2, 1.0, s1
5608
- ; GFX7-NEXT: v_mul_f32_e64 v3, 1.0, s0
5609
- ; GFX7-NEXT: v_lshrrev_b32_e32 v0, 16, v0
5610
- ; GFX7-NEXT: v_lshrrev_b32_e32 v1, 16, v1
5611
- ; GFX7-NEXT: v_and_b32_e32 v0, 0x8000, v0
5612
- ; GFX7-NEXT: v_bfe_u32 v2, v2, 16, 15
5613
- ; GFX7-NEXT: v_and_b32_e32 v1, 0x8000, v1
5614
- ; GFX7-NEXT: v_bfe_u32 v3, v3, 16, 15
5615
- ; GFX7-NEXT: v_or_b32_e32 v0, v2, v0
5616
- ; GFX7-NEXT: v_or_b32_e32 v1, v3, v1
5599
+ ; GFX7-NEXT: v_mul_f32_e64 v1, 1.0, s0
5600
+ ; GFX7-NEXT: s_and_b32 s0, s3, 0x80000000
5601
+ ; GFX7-NEXT: s_lshr_b32 s0, s0, 16
5602
+ ; GFX7-NEXT: v_bfe_u32 v1, v1, 16, 15
5603
+ ; GFX7-NEXT: v_mul_f32_e64 v0, 1.0, s1
5604
+ ; GFX7-NEXT: v_or_b32_e32 v1, s0, v1
5605
+ ; GFX7-NEXT: s_and_b32 s0, s5, 0x80000000
5606
+ ; GFX7-NEXT: s_lshr_b32 s0, s0, 16
5607
+ ; GFX7-NEXT: v_bfe_u32 v0, v0, 16, 15
5608
+ ; GFX7-NEXT: v_or_b32_e32 v0, s0, v0
5617
5609
; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0
5618
5610
; GFX7-NEXT: v_or_b32_e32 v0, v1, v0
5619
5611
; GFX7-NEXT: v_readfirstlane_b32 s0, v0
@@ -6682,51 +6674,45 @@ define <3 x bfloat> @v_copysign_out_v3bf16_mag_v3bf16_sign_v3f64(<3 x bfloat> %m
6682
6674
; GCN: ; %bb.0:
6683
6675
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6684
6676
; GCN-NEXT: v_mul_f32_e32 v0, 1.0, v0
6685
- ; GCN-NEXT: v_mul_f32_e32 v1, 1.0, v1
6686
6677
; GCN-NEXT: v_mul_f32_e32 v2, 1.0, v2
6687
- ; GCN-NEXT: v_cvt_f32_f64_e32 v3, v[3:4]
6688
- ; GCN-NEXT: v_cvt_f32_f64_e32 v4, v[5:6]
6689
- ; GCN-NEXT: v_cvt_f32_f64_e32 v5, v[7:8]
6678
+ ; GCN-NEXT: v_mul_f32_e32 v1, 1.0, v1
6679
+ ; GCN-NEXT: v_and_b32_e32 v3, 0x80000000, v6
6680
+ ; GCN-NEXT: v_and_b32_e32 v5, 0x80000000, v8
6681
+ ; GCN-NEXT: v_and_b32_e32 v4, 0x80000000, v4
6690
6682
; GCN-NEXT: v_lshrrev_b32_e32 v3, 16, v3
6691
- ; GCN-NEXT: v_lshrrev_b32_e32 v4, 16, v4
6683
+ ; GCN-NEXT: v_bfe_u32 v1, v1, 16, 15
6692
6684
; GCN-NEXT: v_lshrrev_b32_e32 v5, 16, v5
6693
6685
; GCN-NEXT: v_bfe_u32 v2, v2, 16, 15
6694
- ; GCN-NEXT: v_bfe_u32 v1, v1, 16, 15
6686
+ ; GCN-NEXT: v_lshrrev_b32_e32 v4, 16, v4
6695
6687
; GCN-NEXT: v_bfe_u32 v0, v0, 16, 15
6696
- ; GCN-NEXT: v_and_b32_e32 v5, 0x8000, v5
6697
- ; GCN-NEXT: v_and_b32_e32 v4, 0x8000, v4
6698
- ; GCN-NEXT: v_and_b32_e32 v3, 0x8000, v3
6688
+ ; GCN-NEXT: v_or_b32_e32 v1, v1, v3
6699
6689
; GCN-NEXT: v_or_b32_e32 v2, v2, v5
6700
- ; GCN-NEXT: v_or_b32_e32 v1, v1, v4
6701
- ; GCN-NEXT: v_or_b32_e32 v0, v0, v3
6702
- ; GCN-NEXT: v_lshlrev_b32_e32 v0, 16, v0
6690
+ ; GCN-NEXT: v_or_b32_e32 v0, v0, v4
6703
6691
; GCN-NEXT: v_lshlrev_b32_e32 v1, 16, v1
6692
+ ; GCN-NEXT: v_lshlrev_b32_e32 v0, 16, v0
6704
6693
; GCN-NEXT: v_lshlrev_b32_e32 v2, 16, v2
6705
6694
; GCN-NEXT: s_setpc_b64 s[30:31]
6706
6695
;
6707
6696
; GFX7-LABEL: v_copysign_out_v3bf16_mag_v3bf16_sign_v3f64:
6708
6697
; GFX7: ; %bb.0:
6709
6698
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6710
- ; GFX7-NEXT: v_cvt_f32_f64_e32 v3, v[3:4]
6711
- ; GFX7-NEXT: v_cvt_f32_f64_e32 v4, v[5:6]
6712
- ; GFX7-NEXT: v_cvt_f32_f64_e32 v5, v[7:8]
6713
- ; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
6714
6699
; GFX7-NEXT: v_mul_f32_e32 v1, 1.0, v1
6700
+ ; GFX7-NEXT: v_and_b32_e32 v3, 0x80000000, v6
6701
+ ; GFX7-NEXT: v_lshrrev_b32_e32 v3, 16, v3
6702
+ ; GFX7-NEXT: v_bfe_u32 v1, v1, 16, 15
6715
6703
; GFX7-NEXT: v_mul_f32_e32 v2, 1.0, v2
6704
+ ; GFX7-NEXT: v_or_b32_e32 v1, v1, v3
6705
+ ; GFX7-NEXT: v_and_b32_e32 v3, 0x80000000, v8
6716
6706
; GFX7-NEXT: v_lshrrev_b32_e32 v3, 16, v3
6717
- ; GFX7-NEXT: v_lshrrev_b32_e32 v4, 16, v4
6718
- ; GFX7-NEXT: v_lshrrev_b32_e32 v5, 16, v5
6719
- ; GFX7-NEXT: v_and_b32_e32 v5, 0x8000, v5
6720
6707
; GFX7-NEXT: v_bfe_u32 v2, v2, 16, 15
6721
- ; GFX7-NEXT: v_and_b32_e32 v4, 0x8000, v4
6722
- ; GFX7-NEXT: v_bfe_u32 v1, v1, 16, 15
6723
- ; GFX7-NEXT: v_and_b32_e32 v3, 0x8000, v3
6708
+ ; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
6709
+ ; GFX7-NEXT: v_or_b32_e32 v2, v2, v3
6710
+ ; GFX7-NEXT: v_and_b32_e32 v3, 0x80000000, v4
6711
+ ; GFX7-NEXT: v_lshrrev_b32_e32 v3, 16, v3
6724
6712
; GFX7-NEXT: v_bfe_u32 v0, v0, 16, 15
6725
- ; GFX7-NEXT: v_or_b32_e32 v2, v2, v5
6726
- ; GFX7-NEXT: v_or_b32_e32 v1, v1, v4
6727
6713
; GFX7-NEXT: v_or_b32_e32 v0, v0, v3
6728
- ; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0
6729
6714
; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1
6715
+ ; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0
6730
6716
; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2
6731
6717
; GFX7-NEXT: s_setpc_b64 s[30:31]
6732
6718
;
@@ -8082,66 +8068,58 @@ define <4 x bfloat> @v_copysign_out_v4bf16_mag_v4bf16_sign_v4f64(<4 x bfloat> %m
8082
8068
; GCN: ; %bb.0:
8083
8069
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8084
8070
; GCN-NEXT: v_mul_f32_e32 v0, 1.0, v0
8085
- ; GCN-NEXT: v_mul_f32_e32 v1, 1.0, v1
8086
8071
; GCN-NEXT: v_mul_f32_e32 v2, 1.0, v2
8087
8072
; GCN-NEXT: v_mul_f32_e32 v3, 1.0, v3
8088
- ; GCN-NEXT: v_cvt_f32_f64_e32 v4, v[4:5]
8089
- ; GCN-NEXT: v_cvt_f32_f64_e32 v5, v[6:7]
8090
- ; GCN-NEXT: v_cvt_f32_f64_e32 v6, v[8:9]
8091
- ; GCN-NEXT: v_cvt_f32_f64_e32 v7, v[10:11]
8073
+ ; GCN-NEXT: v_mul_f32_e32 v1, 1.0, v1
8074
+ ; GCN-NEXT: v_and_b32_e32 v4, 0x80000000, v7
8075
+ ; GCN-NEXT: v_and_b32_e32 v6, 0x80000000, v11
8076
+ ; GCN-NEXT: v_and_b32_e32 v7, 0x80000000, v9
8077
+ ; GCN-NEXT: v_and_b32_e32 v5, 0x80000000, v5
8092
8078
; GCN-NEXT: v_lshrrev_b32_e32 v4, 16, v4
8093
- ; GCN-NEXT: v_lshrrev_b32_e32 v5, 16, v5
8079
+ ; GCN-NEXT: v_bfe_u32 v1, v1, 16, 15
8094
8080
; GCN-NEXT: v_lshrrev_b32_e32 v6, 16, v6
8095
- ; GCN-NEXT: v_lshrrev_b32_e32 v7, 16, v7
8096
8081
; GCN-NEXT: v_bfe_u32 v3, v3, 16, 15
8082
+ ; GCN-NEXT: v_lshrrev_b32_e32 v7, 16, v7
8097
8083
; GCN-NEXT: v_bfe_u32 v2, v2, 16, 15
8098
- ; GCN-NEXT: v_bfe_u32 v1, v1, 16, 15
8084
+ ; GCN-NEXT: v_lshrrev_b32_e32 v5, 16, v5
8099
8085
; GCN-NEXT: v_bfe_u32 v0, v0, 16, 15
8100
- ; GCN-NEXT: v_and_b32_e32 v7, 0x8000, v7
8101
- ; GCN-NEXT: v_and_b32_e32 v6, 0x8000, v6
8102
- ; GCN-NEXT: v_and_b32_e32 v5, 0x8000, v5
8103
- ; GCN-NEXT: v_and_b32_e32 v4, 0x8000, v4
8104
- ; GCN-NEXT: v_or_b32_e32 v3, v3, v7
8105
- ; GCN-NEXT: v_or_b32_e32 v2, v2, v6
8106
- ; GCN-NEXT: v_or_b32_e32 v1, v1, v5
8107
- ; GCN-NEXT: v_or_b32_e32 v0, v0, v4
8108
- ; GCN-NEXT: v_lshlrev_b32_e32 v0, 16, v0
8086
+ ; GCN-NEXT: v_or_b32_e32 v1, v1, v4
8087
+ ; GCN-NEXT: v_or_b32_e32 v3, v3, v6
8088
+ ; GCN-NEXT: v_or_b32_e32 v2, v2, v7
8089
+ ; GCN-NEXT: v_or_b32_e32 v0, v0, v5
8109
8090
; GCN-NEXT: v_lshlrev_b32_e32 v1, 16, v1
8110
- ; GCN-NEXT: v_lshlrev_b32_e32 v2, 16, v2
8111
8091
; GCN-NEXT: v_lshlrev_b32_e32 v3, 16, v3
8092
+ ; GCN-NEXT: v_lshlrev_b32_e32 v0, 16, v0
8093
+ ; GCN-NEXT: v_lshlrev_b32_e32 v2, 16, v2
8112
8094
; GCN-NEXT: s_setpc_b64 s[30:31]
8113
8095
;
8114
8096
; GFX7-LABEL: v_copysign_out_v4bf16_mag_v4bf16_sign_v4f64:
8115
8097
; GFX7: ; %bb.0:
8116
8098
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8117
- ; GFX7-NEXT: v_cvt_f32_f64_e32 v4, v[4:5]
8118
- ; GFX7-NEXT: v_cvt_f32_f64_e32 v5, v[6:7]
8119
- ; GFX7-NEXT: v_cvt_f32_f64_e32 v6, v[8:9]
8120
- ; GFX7-NEXT: v_cvt_f32_f64_e32 v7, v[10:11]
8121
- ; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
8122
8099
; GFX7-NEXT: v_mul_f32_e32 v1, 1.0, v1
8123
- ; GFX7-NEXT: v_mul_f32_e32 v2, 1.0, v2
8100
+ ; GFX7-NEXT: v_and_b32_e32 v4, 0x80000000, v7
8101
+ ; GFX7-NEXT: v_lshrrev_b32_e32 v4, 16, v4
8102
+ ; GFX7-NEXT: v_bfe_u32 v1, v1, 16, 15
8124
8103
; GFX7-NEXT: v_mul_f32_e32 v3, 1.0, v3
8104
+ ; GFX7-NEXT: v_or_b32_e32 v1, v1, v4
8105
+ ; GFX7-NEXT: v_and_b32_e32 v4, 0x80000000, v11
8125
8106
; GFX7-NEXT: v_lshrrev_b32_e32 v4, 16, v4
8126
- ; GFX7-NEXT: v_lshrrev_b32_e32 v5, 16, v5
8127
- ; GFX7-NEXT: v_lshrrev_b32_e32 v6, 16, v6
8128
- ; GFX7-NEXT: v_lshrrev_b32_e32 v7, 16, v7
8129
- ; GFX7-NEXT: v_and_b32_e32 v7, 0x8000, v7
8130
8107
; GFX7-NEXT: v_bfe_u32 v3, v3, 16, 15
8131
- ; GFX7-NEXT: v_and_b32_e32 v6, 0x8000, v6
8108
+ ; GFX7-NEXT: v_mul_f32_e32 v2, 1.0, v2
8109
+ ; GFX7-NEXT: v_or_b32_e32 v3, v3, v4
8110
+ ; GFX7-NEXT: v_and_b32_e32 v4, 0x80000000, v9
8111
+ ; GFX7-NEXT: v_lshrrev_b32_e32 v4, 16, v4
8132
8112
; GFX7-NEXT: v_bfe_u32 v2, v2, 16, 15
8133
- ; GFX7-NEXT: v_and_b32_e32 v5, 0x8000, v5
8134
- ; GFX7-NEXT: v_bfe_u32 v1, v1, 16, 15
8135
- ; GFX7-NEXT: v_and_b32_e32 v4, 0x8000, v4
8113
+ ; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
8114
+ ; GFX7-NEXT: v_or_b32_e32 v2, v2, v4
8115
+ ; GFX7-NEXT: v_and_b32_e32 v4, 0x80000000, v5
8116
+ ; GFX7-NEXT: v_lshrrev_b32_e32 v4, 16, v4
8136
8117
; GFX7-NEXT: v_bfe_u32 v0, v0, 16, 15
8137
- ; GFX7-NEXT: v_or_b32_e32 v3, v3, v7
8138
- ; GFX7-NEXT: v_or_b32_e32 v2, v2, v6
8139
- ; GFX7-NEXT: v_or_b32_e32 v1, v1, v5
8140
8118
; GFX7-NEXT: v_or_b32_e32 v0, v0, v4
8141
- ; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0
8142
8119
; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1
8143
- ; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2
8144
8120
; GFX7-NEXT: v_lshlrev_b32_e32 v3, 16, v3
8121
+ ; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0
8122
+ ; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2
8145
8123
; GFX7-NEXT: s_setpc_b64 s[30:31]
8146
8124
;
8147
8125
; GFX8-LABEL: v_copysign_out_v4bf16_mag_v4bf16_sign_v4f64:
0 commit comments