20
20
21
21
#include "libavutil/aarch64/asm.S"
22
22
23
- .macro lumConvertRange name , max , mult , offset , shift
23
+ .macro lumConvertRange name , fromto , mult , offset , shift
24
24
function ff_\name , export= 1
25
- .if \max != 0
26
- mov w3 , #\max
27
- dup v24. 8h , w3
28
- .endif
29
25
mov w3 , #\mult
30
26
dup v25.4s , w3
31
27
movz w3 , #(\offset & 0xffff )
32
28
movk w3 , #((\offset >> 16 ) & 0xffff ) , lsl # 16
33
29
dup v26.4s , w3
34
30
1 :
35
31
ld1 {v0. 8h } , [ x0 ]
36
- .if \max != 0
37
- smin v0. 8h , v0. 8h , v24. 8h
38
- .endif
39
32
mov v16.16b , v26.16b
40
33
mov v18.16b , v26.16b
41
34
sxtl v20.4s , v0. 4h
42
35
sxtl2 v22.4s , v0. 8h
43
36
mla v16.4s , v20.4s , v25.4s
44
37
mla v18.4s , v22.4s , v25.4s
38
+ .ifc \fromto , To
39
+ sqshrn v0. 4h , v16.4s , #\shift
40
+ sqshrn2 v0. 8h , v18.4s , #\shift
41
+ .else
45
42
shrn v0. 4h , v16.4s , #\shift
46
43
shrn2 v0. 8h , v18.4s , #\shift
44
+ .endif
47
45
subs w1 , w1 , # 8
48
46
st1 {v0. 8h } , [ x0 ], # 16
49
47
b.gt 1b
50
48
ret
51
49
endfunc
52
50
.endm
53
51
54
- .macro chrConvertRange name , max , mult , offset , shift
52
+ .macro chrConvertRange name , fromto , mult , offset , shift
55
53
function ff_\name , export= 1
56
- .if \max != 0
57
- mov w3 , #\max
58
- dup v24. 8h , w3
59
- .endif
60
54
mov w3 , #\mult
61
55
dup v25.4s , w3
62
56
movz w3 , #(\offset & 0xffff )
@@ -65,10 +59,6 @@ function ff_\name, export=1
65
59
1 :
66
60
ld1 {v0. 8h } , [ x0 ]
67
61
ld1 {v1. 8h } , [ x1 ]
68
- .if \max != 0
69
- smin v0. 8h , v0. 8h , v24. 8h
70
- smin v1. 8h , v1. 8h , v24. 8h
71
- .endif
72
62
mov v16.16b , v26.16b
73
63
mov v17.16b , v26.16b
74
64
mov v18.16b , v26.16b
@@ -81,10 +71,17 @@ function ff_\name, export=1
81
71
mla v17.4s , v21.4s , v25.4s
82
72
mla v18.4s , v22.4s , v25.4s
83
73
mla v19.4s , v23.4s , v25.4s
74
+ .ifc \fromto , To
75
+ sqshrn v0. 4h , v16.4s , #\shift
76
+ sqshrn v1. 4h , v17.4s , #\shift
77
+ sqshrn2 v0. 8h , v18.4s , #\shift
78
+ sqshrn2 v1. 8h , v19.4s , #\shift
79
+ .else
84
80
shrn v0. 4h , v16.4s , #\shift
85
81
shrn v1. 4h , v17.4s , #\shift
86
82
shrn2 v0. 8h , v18.4s , #\shift
87
83
shrn2 v1. 8h , v19.4s , #\shift
84
+ .endif
88
85
subs w2 , w2 , # 8
89
86
st1 {v0. 8h } , [ x0 ], # 16
90
87
st1 {v1. 8h } , [ x1 ], # 16
@@ -93,7 +90,7 @@ function ff_\name, export=1
93
90
endfunc
94
91
.endm
95
92
96
- lumConvertRange lumRangeToJpeg_neon , 30189 , 19077 , - 39057361 , 14
97
- chrConvertRange chrRangeToJpeg_neon , 30775 , 4663 , - 9289992 , 12
98
- lumConvertRange lumRangeFromJpeg_neon , 0 , 14071 , 33561947 , 14
99
- chrConvertRange chrRangeFromJpeg_neon , 0 , 1799 , 4081085 , 11
93
+ lumConvertRange lumRangeToJpeg_neon , To , 19077 , - 39057361 , 14
94
+ chrConvertRange chrRangeToJpeg_neon , To , 4663 , - 9289992 , 12
95
+ lumConvertRange lumRangeFromJpeg_neon , From , 14071 , 33561947 , 14
96
+ chrConvertRange chrRangeFromJpeg_neon , From , 1799 , 4081085 , 11
0 commit comments