|
4 | 4 | # This source code is licensed under the BSD-style license found in the
|
5 | 5 | # LICENSE file in the root directory of this source tree.
|
6 | 6 |
|
| 7 | +import io |
7 | 8 | import os
|
8 | 9 | from functools import partial
|
9 | 10 |
|
@@ -922,6 +923,76 @@ def get_all_frames(asset, sample_rate=None, stop_seconds=None):
|
922 | 923 |
|
923 | 924 | torch.testing.assert_close(frames_downsampled_to_8000, frames_8000_native)
|
924 | 925 |
|
| 926 | + @pytest.mark.parametrize("buffering", (0, 1024)) |
| 927 | + @pytest.mark.parametrize("device", cpu_and_cuda()) |
| 928 | + def test_file_like_decoding(self, buffering, device): |
| 929 | + # Test to ensure that seeks and reads are actually going through the |
| 930 | + # methods on the IO object. |
| 931 | + # |
| 932 | + # Note that we do not check the number of reads in this test past the |
| 933 | + # initialization step. That is because the number of reads that FFmpeg |
| 934 | + # issues is dependent on the size of the internal buffer, the amount of |
| 935 | + # data per frame and the size of the video file. We can't control |
| 936 | + # the size of the buffer from the Python layer and we don't know the |
| 937 | + # amount of data per frame. We also can't know the amount of data per |
| 938 | + # frame from first principles, because it is data-depenent. |
| 939 | + class FileOpCounter(io.RawIOBase): |
| 940 | + |
| 941 | + def __init__(self, file: io.RawIOBase): |
| 942 | + self._file = file |
| 943 | + self.num_seeks = 0 |
| 944 | + self.num_reads = 0 |
| 945 | + |
| 946 | + def read(self, size: int) -> bytes: |
| 947 | + self.num_reads += 1 |
| 948 | + return self._file.read(size) |
| 949 | + |
| 950 | + def seek(self, offset: int, whence: int) -> bytes: |
| 951 | + self.num_seeks += 1 |
| 952 | + return self._file.seek(offset, whence) |
| 953 | + |
| 954 | + file_counter = FileOpCounter( |
| 955 | + open(NASA_VIDEO.path, mode="rb", buffering=buffering) |
| 956 | + ) |
| 957 | + decoder = create_from_file_like(file_counter, "approximate") |
| 958 | + add_video_stream(decoder, device=device) |
| 959 | + |
| 960 | + frame0, *_ = get_next_frame(decoder) |
| 961 | + reference_frame0 = NASA_VIDEO.get_frame_data_by_index(0) |
| 962 | + assert_frames_equal(frame0, reference_frame0.to(device)) |
| 963 | + |
| 964 | + # We don't assert the actual number of reads and seeks because that is |
| 965 | + # dependent on both the size of the internal buffers on the C++ side and |
| 966 | + # how much is read during initialization. Note that we still decode |
| 967 | + # several frames at startup to improve metadata accuracy. |
| 968 | + assert file_counter.num_seeks > 0 |
| 969 | + assert file_counter.num_reads > 0 |
| 970 | + |
| 971 | + initialization_seeks = file_counter.num_seeks |
| 972 | + |
| 973 | + seek_to_pts(decoder, 12.979633) |
| 974 | + |
| 975 | + frame_last, *_ = get_next_frame(decoder) |
| 976 | + reference_frame_last = NASA_VIDEO.get_frame_data_by_index(289) |
| 977 | + assert_frames_equal(frame_last, reference_frame_last.to(device)) |
| 978 | + |
| 979 | + assert file_counter.num_seeks > initialization_seeks |
| 980 | + |
| 981 | + last_frame_seeks = file_counter.num_seeks |
| 982 | + |
| 983 | + # We're smart enough to avoid seeks within key frames and our test |
| 984 | + # files have very few keyframes. However, we can force a seek by |
| 985 | + # requesting a backwards seek. |
| 986 | + seek_to_pts(decoder, 6.0) |
| 987 | + |
| 988 | + frame_time6, *_ = get_next_frame(decoder) |
| 989 | + reference_frame_time6 = NASA_VIDEO.get_frame_data_by_index( |
| 990 | + INDEX_OF_FRAME_AT_6_SECONDS |
| 991 | + ) |
| 992 | + assert_frames_equal(frame_time6, reference_frame_time6.to(device)) |
| 993 | + |
| 994 | + assert file_counter.num_seeks > last_frame_seeks |
| 995 | + |
925 | 996 |
|
926 | 997 | if __name__ == "__main__":
|
927 | 998 | pytest.main()
|
0 commit comments