Skip to content

Commit 11b4703

Browse files
cgisquetmichaelni
authored andcommitted
huffyuvdec: implement trick
When the joint table does not contain a valid entry, the decoding restarts from scratch. By implementing the trick of jumping to the 2nd level of the individual table (and inlining the whole), a speed improvement of 5-10% is possible. On a 1000-frames YUV4:2:0 video, before: 362851 decicycles in 422, 262094 runs, 50 skips 182488 decicycles in gray, 262087 runs, 57 skips Object size: 23584 Overall time: 8.377 After: 346800 decicycles in 422, 262079 runs, 65 skips 168197 decicycles in gray, 262077 runs, 67 skips Object size: 23188 Overall time: 7.878 Signed-off-by: Michael Niedermayer <[email protected]>
1 parent c7e5462 commit 11b4703

File tree

1 file changed

+57
-14
lines changed

1 file changed

+57
-14
lines changed

libavcodec/huffyuvdec.c

Lines changed: 57 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -559,23 +559,62 @@ static av_cold int decode_init_thread_copy(AVCodecContext *avctx)
559559
return 0;
560560
}
561561

562-
/* TODO instead of restarting the read when the code isn't in the first level
563-
* of the joint table, jump into the 2nd level of the individual table. */
564-
#define READ_2PIX(dst0, dst1, plane1){\
565-
uint16_t code = get_vlc2(&s->gb, s->vlc[4+plane1].table, VLC_BITS, 1);\
566-
if(code != 0xffff){\
567-
dst0 = code>>8;\
568-
dst1 = code;\
569-
}else{\
570-
dst0 = get_vlc2(&s->gb, s->vlc[0].table, VLC_BITS, 3);\
571-
dst1 = get_vlc2(&s->gb, s->vlc[plane1].table, VLC_BITS, 3);\
572-
}\
573-
}
562+
#define DUAL_INTERN(dst, table, gb, name, bits, max_depth) \
563+
code = table[index][0]; \
564+
n = table[index][1]; \
565+
if (max_depth > 1 && n < 0) { \
566+
LAST_SKIP_BITS(name, gb, bits); \
567+
UPDATE_CACHE(name, gb); \
568+
\
569+
nb_bits = -n; \
570+
index = SHOW_UBITS(name, gb, nb_bits) + code; \
571+
code = table[index][0]; \
572+
n = table[index][1]; \
573+
if (max_depth > 2 && n < 0) { \
574+
LAST_SKIP_BITS(name, gb, nb_bits); \
575+
UPDATE_CACHE(name, gb); \
576+
\
577+
nb_bits = -n; \
578+
index = SHOW_UBITS(name, gb, nb_bits) + code; \
579+
code = table[index][0]; \
580+
n = table[index][1]; \
581+
} \
582+
} \
583+
dst = code; \
584+
LAST_SKIP_BITS(name, gb, n)
585+
586+
587+
#define GET_VLC_DUAL(dst0, dst1, name, gb, dtable, table1, table2, \
588+
bits, max_depth, rsvd ) \
589+
do { \
590+
unsigned int index = SHOW_UBITS(name, gb, bits); \
591+
int code = dtable[index][0]; \
592+
int n = dtable[index][1]; \
593+
\
594+
if (code != rsvd && n>0) { \
595+
dst0 = code>>8; \
596+
dst1 = code; \
597+
LAST_SKIP_BITS(name, gb, n); \
598+
} else { \
599+
int nb_bits; \
600+
DUAL_INTERN(dst0, table1, gb, name, bits, max_depth); \
601+
\
602+
UPDATE_CACHE(re, gb); \
603+
index = SHOW_UBITS(name, gb, bits); \
604+
DUAL_INTERN(dst1, table2, gb, name, bits, max_depth); \
605+
} \
606+
} while (0)
607+
608+
#define READ_2PIX(dst0, dst1, plane1)\
609+
UPDATE_CACHE(re, &s->gb); \
610+
GET_VLC_DUAL(dst0, dst1, re, &s->gb, s->vlc[4+plane1].table, \
611+
s->vlc[0].table, s->vlc[plane1].table, \
612+
VLC_BITS, 3, 0xffff)
574613

575614
static void decode_422_bitstream(HYuvContext *s, int count)
576615
{
577616
int i;
578-
617+
OPEN_READER(re, &s->gb);
579618
count /= 2;
580619

581620
if (count >= (get_bits_left(&s->gb)) / (31 * 4)) {
@@ -592,8 +631,11 @@ static void decode_422_bitstream(HYuvContext *s, int count)
592631
READ_2PIX(s->temp[0][2 * i + 1], s->temp[2][i], 2);
593632
}
594633
}
634+
CLOSE_READER(re, &s->gb);
595635
}
596636

637+
/* TODO instead of restarting the read when the code isn't in the first level
638+
* of the joint table, jump into the 2nd level of the individual table. */
597639
#define READ_2PIX_PLANE(dst0, dst1, plane){\
598640
uint16_t code = get_vlc2(&s->gb, s->vlc[4+plane].table, VLC_BITS, 1);\
599641
if(code != 0xffff){\
@@ -663,7 +705,7 @@ static void decode_plane_bitstream(HYuvContext *s, int count, int plane)
663705
static void decode_gray_bitstream(HYuvContext *s, int count)
664706
{
665707
int i;
666-
708+
OPEN_READER(re, &s->gb);
667709
count/=2;
668710

669711
if (count >= (get_bits_left(&s->gb)) / (31 * 2)) {
@@ -675,6 +717,7 @@ static void decode_gray_bitstream(HYuvContext *s, int count)
675717
READ_2PIX(s->temp[0][2 * i], s->temp[0][2 * i + 1], 0);
676718
}
677719
}
720+
CLOSE_READER(re, &s->gb);
678721
}
679722

680723
static av_always_inline void decode_bgr_1(HYuvContext *s, int count,

0 commit comments

Comments
 (0)