|
1 | 1 | import abc
|
2 | 2 | import json
|
3 | 3 | import subprocess
|
| 4 | +import typing |
4 | 5 | import urllib.request
|
5 | 6 | from concurrent.futures import ThreadPoolExecutor, wait
|
6 |
| -from dataclasses import dataclass |
| 7 | +from dataclasses import dataclass, field |
7 | 8 | from itertools import product
|
8 | 9 | from pathlib import Path
|
9 | 10 |
|
@@ -824,6 +825,42 @@ def convert_result_to_df_item(
|
824 | 825 | return df_item
|
825 | 826 |
|
826 | 827 |
|
| 828 | +@dataclass |
| 829 | +class DecoderKind: |
| 830 | + display_name: str |
| 831 | + kind: typing.Type[AbstractDecoder] |
| 832 | + default_options: dict[str, str] = field(default_factory=dict) |
| 833 | + |
| 834 | + |
| 835 | +decoder_registry = { |
| 836 | + "decord": DecoderKind("DecordAccurate", DecordAccurate), |
| 837 | + "decord_batch": DecoderKind("DecordAccurateBatch", DecordAccurateBatch), |
| 838 | + "torchcodec_core": DecoderKind("TorchCodecCore", TorchCodecCore), |
| 839 | + "torchcodec_core_batch": DecoderKind("TorchCodecCoreBatch", TorchCodecCoreBatch), |
| 840 | + "torchcodec_core_nonbatch": DecoderKind( |
| 841 | + "TorchCodecCoreNonBatch", TorchCodecCoreNonBatch |
| 842 | + ), |
| 843 | + "torchcodec_core_compiled": DecoderKind( |
| 844 | + "TorchCodecCoreCompiled", TorchCodecCoreCompiled |
| 845 | + ), |
| 846 | + "torchcodec_public": DecoderKind("TorchCodecPublic", TorchCodecPublic), |
| 847 | + "torchcodec_public_nonbatch": DecoderKind( |
| 848 | + "TorchCodecPublicNonBatch", TorchCodecPublicNonBatch |
| 849 | + ), |
| 850 | + "torchvision": DecoderKind( |
| 851 | + # We don't compare against TorchVision's "pyav" backend because it doesn't support |
| 852 | + # accurate seeks. |
| 853 | + "TorchVision[backend=video_reader]", |
| 854 | + TorchVision, |
| 855 | + {"backend": "video_reader"}, |
| 856 | + ), |
| 857 | + "torchaudio": DecoderKind("TorchAudio", TorchAudioDecoder), |
| 858 | + "opencv": DecoderKind( |
| 859 | + "OpenCV[backend=FFMPEG]", OpenCVDecoder, {"backend": "FFMPEG"} |
| 860 | + ), |
| 861 | +} |
| 862 | + |
| 863 | + |
827 | 864 | def run_benchmarks(
|
828 | 865 | decoder_dict: dict[str, AbstractDecoder],
|
829 | 866 | video_files_paths: list[Path],
|
@@ -856,6 +893,7 @@ def run_benchmarks(
|
856 | 893 | # The decoder items are sorted to perform and display the benchmarks in a consistent order.
|
857 | 894 | for decoder_name, decoder in sorted(decoder_dict.items(), key=lambda x: x[0]):
|
858 | 895 | print(f"video={video_file_path}, decoder={decoder_name}")
|
| 896 | + print(f"metadata={metadata_label}") |
859 | 897 |
|
860 | 898 | if dataloader_parameters:
|
861 | 899 | bp = dataloader_parameters.batch_parameters
|
@@ -986,3 +1024,63 @@ def run_benchmarks(
|
986 | 1024 | compare = benchmark.Compare(results)
|
987 | 1025 | compare.print()
|
988 | 1026 | return df_data
|
| 1027 | + |
| 1028 | + |
| 1029 | +def verify_outputs(decoders_to_run, video_paths, num_samples): |
| 1030 | + from tensorcat import tensorcat |
| 1031 | + from torchcodec._frame import FrameBatch |
| 1032 | + |
| 1033 | + # Get frames using a decoder |
| 1034 | + for video_file_path in video_paths: |
| 1035 | + metadata = get_metadata(video_file_path) |
| 1036 | + metadata_label = f"{metadata.codec} {metadata.width}x{metadata.height}, {metadata.duration_seconds}s {metadata.average_fps}fps" |
| 1037 | + print(f"{metadata_label=}") |
| 1038 | + |
| 1039 | + duration = metadata.duration_seconds |
| 1040 | + uniform_pts_list = [i * duration / num_samples for i in range(num_samples)] |
| 1041 | + |
| 1042 | + decoders_and_frames = [] |
| 1043 | + for decoder_name, decoder in decoders_to_run.items(): |
| 1044 | + print(f"video={video_file_path}, decoder={decoder_name}") |
| 1045 | + |
| 1046 | + # Decode random or uniform frames |
| 1047 | + new_frames = decoder.decode_frames(video_file_path, uniform_pts_list) |
| 1048 | + if isinstance(new_frames, FrameBatch): |
| 1049 | + new_frames = new_frames.data |
| 1050 | + decoders_and_frames.append((decoder_name, new_frames)) |
| 1051 | + |
| 1052 | + # Decode the first n frames |
| 1053 | + # new_frames = decoder.decode_first_n_frames(video_file_path, num_samples) |
| 1054 | + # if isinstance(new_frames, FrameBatch): |
| 1055 | + # new_frames = new_frames.data |
| 1056 | + # decoders_and_frames.append((decoder_name, new_frames)) |
| 1057 | + |
| 1058 | + if len(decoders_and_frames) == 1: |
| 1059 | + # Display the frames if only 1 decoder passed in |
| 1060 | + baseline = decoders_and_frames[0] |
| 1061 | + for frame in baseline[1]: |
| 1062 | + tensorcat(frame) |
| 1063 | + else: |
| 1064 | + # Transitively compare the frames from all decoders |
| 1065 | + num_decoders = len(decoders_and_frames) |
| 1066 | + prev_decoder = decoders_and_frames[-1] |
| 1067 | + for i in range(0, num_decoders): |
| 1068 | + all_match = True |
| 1069 | + curr_decoder = decoders_and_frames[i] |
| 1070 | + print(f"Compare: {prev_decoder[0]} and {curr_decoder[0]}") |
| 1071 | + assert len(prev_decoder[1]) == len(curr_decoder[1]) |
| 1072 | + for f1, f2 in zip(curr_decoder[1], prev_decoder[1]): |
| 1073 | + # Validate that the frames are the same with a tolerance |
| 1074 | + try: |
| 1075 | + torch.testing.assert_close(f1, f2) |
| 1076 | + except Exception as e: |
| 1077 | + tensorcat(f1) |
| 1078 | + tensorcat(f2) |
| 1079 | + all_match = False |
| 1080 | + print( |
| 1081 | + f"Error while comparing {curr_decoder[0]} and {prev_decoder[0]}: {e}" |
| 1082 | + ) |
| 1083 | + break |
| 1084 | + prev_decoder = curr_decoder |
| 1085 | + if all_match: |
| 1086 | + print(f"Results of {curr_decoder[0]} and {prev_decoder[0]} match") |
0 commit comments