Skip to content

Commit ac0f5f4

Browse files
committed
avfilter/vf_maskedclamp: add x86 SIMD
1 parent 11cfff0 commit ac0f5f4

File tree

5 files changed

+190
-7
lines changed

5 files changed

+190
-7
lines changed

libavfilter/maskedclamp.h

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
/*
2+
* Copyright (c) 2019 Paul B Mahol
3+
*
4+
* This file is part of FFmpeg.
5+
*
6+
* FFmpeg is free software; you can redistribute it and/or
7+
* modify it under the terms of the GNU Lesser General Public
8+
* License as published by the Free Software Foundation; either
9+
* version 2.1 of the License, or (at your option) any later version.
10+
*
11+
* FFmpeg is distributed in the hope that it will be useful,
12+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
13+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14+
* Lesser General Public License for more details.
15+
*
16+
* You should have received a copy of the GNU Lesser General Public
17+
* License along with FFmpeg; if not, write to the Free Software
18+
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19+
*/
20+
21+
#ifndef AVFILTER_MASKEDCLAMP_H
22+
#define AVFILTER_MASKEDCLAMP_H
23+
24+
#include <stddef.h>
25+
#include <stdint.h>
26+
27+
typedef struct MaskedClampDSPContext {
28+
void (*maskedclamp)(const uint8_t *bsrc, uint8_t *dst,
29+
const uint8_t *darksrc, const uint8_t *brightsrc,
30+
int w, int undershoot, int overshoot);
31+
} MaskedClampDSPContext;
32+
33+
void ff_maskedclamp_init_x86(MaskedClampDSPContext *dsp, int depth);
34+
35+
#endif /* AVFILTER_MASKEDCLAMP_H */

libavfilter/vf_maskedclamp.c

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
#include "internal.h"
2727
#include "video.h"
2828
#include "framesync.h"
29+
#include "maskedclamp.h"
2930

3031
#define OFFSET(x) offsetof(MaskedClampContext, x)
3132
#define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
@@ -47,9 +48,7 @@ typedef struct MaskedClampContext {
4748
int depth;
4849
FFFrameSync fs;
4950

50-
void (*maskedclamp)(const uint8_t *bsrc, uint8_t *dst,
51-
const uint8_t *darksrc, const uint8_t *brightsrc,
52-
int w, int undershoot, int overshoot);
51+
MaskedClampDSPContext dsp;
5352
} MaskedClampContext;
5453

5554
static const AVOption maskedclamp_options[] = {
@@ -117,7 +116,7 @@ static int maskedclamp_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_
117116
}
118117

119118
for (y = slice_start; y < slice_end; y++) {
120-
s->maskedclamp(bsrc, dst, darksrc, brightsrc, w, undershoot, overshoot);
119+
s->dsp.maskedclamp(bsrc, dst, darksrc, brightsrc, w, undershoot, overshoot);
121120

122121
dst += dlinesize;
123122
bsrc += blinesize;
@@ -210,11 +209,16 @@ static int config_input(AVFilterLink *inlink)
210209
s->width[0] = s->width[3] = inlink->w;
211210

212211
s->depth = desc->comp[0].depth;
212+
s->undershoot = FFMIN(s->undershoot, (1 << s->depth) - 1);
213+
s->overshoot = FFMIN(s->overshoot, (1 << s->depth) - 1);
213214

214-
if (desc->comp[0].depth == 8)
215-
s->maskedclamp = maskedclamp8;
215+
if (s->depth <= 8)
216+
s->dsp.maskedclamp = maskedclamp8;
216217
else
217-
s->maskedclamp = maskedclamp16;
218+
s->dsp.maskedclamp = maskedclamp16;
219+
220+
if (ARCH_X86)
221+
ff_maskedclamp_init_x86(&s->dsp, s->depth);
218222

219223
return 0;
220224
}

libavfilter/x86/Makefile

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ OBJS-$(CONFIG_HQDN3D_FILTER) += x86/vf_hqdn3d_init.o
1717
OBJS-$(CONFIG_IDET_FILTER) += x86/vf_idet_init.o
1818
OBJS-$(CONFIG_INTERLACE_FILTER) += x86/vf_tinterlace_init.o
1919
OBJS-$(CONFIG_LIMITER_FILTER) += x86/vf_limiter_init.o
20+
OBJS-$(CONFIG_MASKEDCLAMP_FILTER) += x86/vf_maskedclamp_init.o
2021
OBJS-$(CONFIG_MASKEDMERGE_FILTER) += x86/vf_maskedmerge_init.o
2122
OBJS-$(CONFIG_NOISE_FILTER) += x86/vf_noise.o
2223
OBJS-$(CONFIG_OVERLAY_FILTER) += x86/vf_overlay_init.o
@@ -56,6 +57,7 @@ X86ASM-OBJS-$(CONFIG_HQDN3D_FILTER) += x86/vf_hqdn3d.o
5657
X86ASM-OBJS-$(CONFIG_IDET_FILTER) += x86/vf_idet.o
5758
X86ASM-OBJS-$(CONFIG_INTERLACE_FILTER) += x86/vf_interlace.o
5859
X86ASM-OBJS-$(CONFIG_LIMITER_FILTER) += x86/vf_limiter.o
60+
X86ASM-OBJS-$(CONFIG_MASKEDCLAMP_FILTER) += x86/vf_maskedclamp.o
5961
X86ASM-OBJS-$(CONFIG_MASKEDMERGE_FILTER) += x86/vf_maskedmerge.o
6062
X86ASM-OBJS-$(CONFIG_OVERLAY_FILTER) += x86/vf_overlay.o
6163
X86ASM-OBJS-$(CONFIG_PP7_FILTER) += x86/vf_pp7.o

libavfilter/x86/vf_maskedclamp.asm

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
;*****************************************************************************
2+
;* x86-optimized functions for maskedclamp filter
3+
;*
4+
;* Copyright (c) 2019 Paul B Mahol
5+
;*
6+
;* This file is part of FFmpeg.
7+
;*
8+
;* FFmpeg is free software; you can redistribute it and/or
9+
;* modify it under the terms of the GNU Lesser General Public
10+
;* License as published by the Free Software Foundation; either
11+
;* version 2.1 of the License, or (at your option) any later version.
12+
;*
13+
;* FFmpeg is distributed in the hope that it will be useful,
14+
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
15+
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16+
;* Lesser General Public License for more details.
17+
;*
18+
;* You should have received a copy of the GNU Lesser General Public
19+
;* License along with FFmpeg; if not, write to the Free Software
20+
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21+
;******************************************************************************
22+
23+
%include "libavutil/x86/x86util.asm"
24+
25+
SECTION .text
26+
27+
;------------------------------------------------------------------------------
28+
; void ff_maskedclamp(const uint8_t *src, uint8_t *dst,
29+
; const uint8_t *darksrc,
30+
; const uint8_t *brightsrc,
31+
; int w, int undershoot, int overshoot)
32+
;------------------------------------------------------------------------------
33+
34+
INIT_XMM sse2
35+
cglobal maskedclamp8, 5,5,5, src, dst, dark, bright, w, undershoot, overshoot
36+
movsxdifnidn wq, wd
37+
38+
add srcq, wq
39+
add darkq, wq
40+
add brightq, wq
41+
add dstq, wq
42+
neg wq
43+
44+
movd m3, r5m
45+
punpcklbw m3, m3
46+
SPLATW m3, m3
47+
48+
movd m4, r6m
49+
punpcklbw m4, m4
50+
SPLATW m4, m4
51+
52+
.loop:
53+
movu m0, [srcq + wq]
54+
movu m1, [darkq + wq]
55+
movu m2, [brightq + wq]
56+
57+
psubusb m1, m3
58+
paddusb m2, m4
59+
CLIPUB m0, m1, m2
60+
mova [dstq + wq], m0
61+
62+
add wq, mmsize
63+
jl .loop
64+
RET
65+
66+
INIT_XMM sse4
67+
cglobal maskedclamp16, 5,5,5, src, dst, dark, bright, w, undershoot, overshoot
68+
shl wd, 1
69+
70+
add srcq, wq
71+
add darkq, wq
72+
add brightq, wq
73+
add dstq, wq
74+
neg wq
75+
76+
movd m3, r5m
77+
SPLATW m3, m3
78+
79+
movd m4, r6m
80+
SPLATW m4, m4
81+
82+
.loop:
83+
movu m0, [srcq + wq]
84+
movu m1, [darkq + wq]
85+
movu m2, [brightq + wq]
86+
87+
psubusw m1, m3
88+
paddusw m2, m4
89+
pmaxuw m0, m1
90+
pminuw m0, m2
91+
mova [dstq + wq], m0
92+
93+
add wq, mmsize
94+
jl .loop
95+
RET

libavfilter/x86/vf_maskedclamp_init.c

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
/*
2+
* Copyright (c) 2019 Paul B Mahol
3+
*
4+
* This file is part of FFmpeg.
5+
*
6+
* FFmpeg is free software; you can redistribute it and/or
7+
* modify it under the terms of the GNU Lesser General Public
8+
* License as published by the Free Software Foundation; either
9+
* version 2.1 of the License, or (at your option) any later version.
10+
*
11+
* FFmpeg is distributed in the hope that it will be useful,
12+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
13+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14+
* Lesser General Public License for more details.
15+
*
16+
* You should have received a copy of the GNU Lesser General Public
17+
* License along with FFmpeg; if not, write to the Free Software
18+
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19+
*/
20+
21+
#include "libavutil/attributes.h"
22+
#include "libavutil/cpu.h"
23+
#include "libavutil/mem.h"
24+
#include "libavutil/x86/asm.h"
25+
#include "libavutil/x86/cpu.h"
26+
#include "libavfilter/maskedclamp.h"
27+
28+
void ff_maskedclamp8_sse2(const uint8_t *bsrc, uint8_t *dst,
29+
const uint8_t *darksrc, const uint8_t *brightsrc,
30+
int w, int undershoot, int overshoot);
31+
32+
void ff_maskedclamp16_sse4(const uint8_t *bsrc, uint8_t *dst,
33+
const uint8_t *darksrc, const uint8_t *brightsrc,
34+
int w, int undershoot, int overshoot);
35+
36+
av_cold void ff_maskedclamp_init_x86(MaskedClampDSPContext *dsp, int depth)
37+
{
38+
int cpu_flags = av_get_cpu_flags();
39+
40+
if (EXTERNAL_SSE2(cpu_flags) && depth <= 8) {
41+
dsp->maskedclamp = ff_maskedclamp8_sse2;
42+
}
43+
44+
if (EXTERNAL_SSE4(cpu_flags) && depth > 8) {
45+
dsp->maskedclamp = ff_maskedclamp16_sse4;
46+
}
47+
}

0 commit comments

Comments
 (0)