Skip to content

Commit 4219df4

Browse files
committed
Update minPtsSecondsFromScan, maxPtsSecondsFromScan, numFramesFromScan
1 parent 1a8b3f8 commit 4219df4

File tree

8 files changed

+51
-49
lines changed

8 files changed

+51
-49
lines changed

src/torchcodec/_core/Metadata.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -37,10 +37,10 @@ struct StreamMetadata {
3737
std::optional<int64_t> minPtsFromScan;
3838
std::optional<int64_t> maxPtsFromScan;
3939
// These presentation timestamps are in seconds.
40-
std::optional<double> minPtsSecondsFromScan;
41-
std::optional<double> maxPtsSecondsFromScan;
40+
std::optional<double> beginStreamSecondsFromContent;
41+
std::optional<double> endStreamFromContentSeconds;
4242
// This can be useful for index-based seeking.
43-
std::optional<int64_t> numFramesFromScan;
43+
std::optional<int64_t> numFramesFromContent;
4444

4545
// Video-only fields derived from the AVCodecContext.
4646
std::optional<int64_t> width;

src/torchcodec/_core/SingleStreamDecoder.cpp

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -241,8 +241,8 @@ void SingleStreamDecoder::scanFileAndUpdateMetadataAndIndex() {
241241
streamMetadata.maxPtsFromScan = std::max(
242242
streamMetadata.maxPtsFromScan.value_or(INT64_MIN),
243243
getPtsOrDts(packet) + packet->duration);
244-
streamMetadata.numFramesFromScan =
245-
streamMetadata.numFramesFromScan.value_or(0) + 1;
244+
streamMetadata.numFramesFromContent =
245+
streamMetadata.numFramesFromContent.value_or(0) + 1;
246246

247247
// Note that we set the other value in this struct, nextPts, only after
248248
// we have scanned all packets and sorted by pts.
@@ -262,15 +262,15 @@ void SingleStreamDecoder::scanFileAndUpdateMetadataAndIndex() {
262262
auto& streamMetadata = containerMetadata_.allStreamMetadata[streamIndex];
263263
auto avStream = formatContext_->streams[streamIndex];
264264

265-
streamMetadata.numFramesFromScan =
265+
streamMetadata.numFramesFromContent =
266266
streamInfos_[streamIndex].allFrames.size();
267267

268268
if (streamMetadata.minPtsFromScan.has_value()) {
269-
streamMetadata.minPtsSecondsFromScan =
269+
streamMetadata.beginStreamSecondsFromContent =
270270
*streamMetadata.minPtsFromScan * av_q2d(avStream->time_base);
271271
}
272272
if (streamMetadata.maxPtsFromScan.has_value()) {
273-
streamMetadata.maxPtsSecondsFromScan =
273+
streamMetadata.endStreamFromContentSeconds =
274274
*streamMetadata.maxPtsFromScan * av_q2d(avStream->time_base);
275275
}
276276
}
@@ -1461,7 +1461,7 @@ int64_t SingleStreamDecoder::getNumFrames(
14611461
const StreamMetadata& streamMetadata) {
14621462
switch (seekMode_) {
14631463
case SeekMode::exact:
1464-
return streamMetadata.numFramesFromScan.value();
1464+
return streamMetadata.numFramesFromContent.value();
14651465
case SeekMode::approximate: {
14661466
TORCH_CHECK(
14671467
streamMetadata.numFramesFromHeader.has_value(),
@@ -1477,7 +1477,7 @@ double SingleStreamDecoder::getMinSeconds(
14771477
const StreamMetadata& streamMetadata) {
14781478
switch (seekMode_) {
14791479
case SeekMode::exact:
1480-
return streamMetadata.minPtsSecondsFromScan.value();
1480+
return streamMetadata.beginStreamSecondsFromContent.value();
14811481
case SeekMode::approximate:
14821482
return 0;
14831483
default:
@@ -1489,7 +1489,7 @@ double SingleStreamDecoder::getMaxSeconds(
14891489
const StreamMetadata& streamMetadata) {
14901490
switch (seekMode_) {
14911491
case SeekMode::exact:
1492-
return streamMetadata.maxPtsSecondsFromScan.value();
1492+
return streamMetadata.endStreamFromContentSeconds.value();
14931493
case SeekMode::approximate: {
14941494
TORCH_CHECK(
14951495
streamMetadata.durationSecondsFromHeader.has_value(),

src/torchcodec/_core/SingleStreamDecoder.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,7 @@ class SingleStreamDecoder {
121121
//
122122
// Valid values for startSeconds and stopSeconds are:
123123
//
124-
// [minPtsSecondsFromScan, maxPtsSecondsFromScan)
124+
// [beginStreamSecondsFromContent, maxPtsSecondsFromScan)
125125
FrameBatchOutput getFramesPlayedInRange(
126126
double startSeconds,
127127
double stopSeconds);

src/torchcodec/_core/_metadata.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -227,23 +227,25 @@ def get_container_metadata(decoder: torch.Tensor) -> ContainerMetadata:
227227
common_meta = dict(
228228
duration_seconds_from_header=stream_dict.get("durationSecondsFromHeader"),
229229
bit_rate=stream_dict.get("bitRate"),
230-
begin_stream_seconds_from_header=stream_dict.get("beginStreamSecondsFromHeader"),
230+
begin_stream_seconds_from_header=stream_dict.get(
231+
"beginStreamSecondsFromHeader"
232+
),
231233
codec=stream_dict.get("codec"),
232234
stream_index=stream_index,
233235
)
234236
if stream_dict["mediaType"] == "video":
235237
streams_metadata.append(
236238
VideoStreamMetadata(
237239
begin_stream_seconds_from_content=stream_dict.get(
238-
"minPtsSecondsFromScan"
240+
"beginStreamSecondsFromContent"
239241
),
240242
end_stream_seconds_from_content=stream_dict.get(
241-
"maxPtsSecondsFromScan"
243+
"endStreamFromContentSeconds"
242244
),
243245
width=stream_dict.get("width"),
244246
height=stream_dict.get("height"),
245247
num_frames_from_header=stream_dict.get("numFramesFromHeader"),
246-
num_frames_from_content=stream_dict.get("numFramesFromScan"),
248+
num_frames_from_content=stream_dict.get("numFramesFromContent"),
247249
average_fps_from_header=stream_dict.get("averageFpsFromHeader"),
248250
**common_meta,
249251
)

src/torchcodec/_core/custom_ops.cpp

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -478,20 +478,20 @@ std::string get_json_metadata(at::Tensor& decoder) {
478478
if (maybeBestVideoStreamIndex.has_value()) {
479479
auto streamMetadata =
480480
videoMetadata.allStreamMetadata[*maybeBestVideoStreamIndex];
481-
if (streamMetadata.numFramesFromScan.has_value()) {
481+
if (streamMetadata.numFramesFromContent.has_value()) {
482482
metadataMap["numFramesFromHeader"] =
483-
std::to_string(*streamMetadata.numFramesFromScan);
483+
std::to_string(*streamMetadata.numFramesFromContent);
484484
} else if (streamMetadata.numFramesFromHeader.has_value()) {
485485
metadataMap["numFramesFromHeader"] =
486486
std::to_string(*streamMetadata.numFramesFromHeader);
487487
}
488-
if (streamMetadata.minPtsSecondsFromScan.has_value()) {
489-
metadataMap["minPtsSecondsFromScan"] =
490-
std::to_string(*streamMetadata.minPtsSecondsFromScan);
488+
if (streamMetadata.beginStreamSecondsFromContent.has_value()) {
489+
metadataMap["beginStreamSecondsFromContent"] =
490+
std::to_string(*streamMetadata.beginStreamSecondsFromContent);
491491
}
492-
if (streamMetadata.maxPtsSecondsFromScan.has_value()) {
493-
metadataMap["maxPtsSecondsFromScan"] =
494-
std::to_string(*streamMetadata.maxPtsSecondsFromScan);
492+
if (streamMetadata.endStreamFromContentSeconds.has_value()) {
493+
metadataMap["endStreamFromContentSeconds"] =
494+
std::to_string(*streamMetadata.endStreamFromContentSeconds);
495495
}
496496
if (streamMetadata.codecName.has_value()) {
497497
metadataMap["codec"] = quoteValue(streamMetadata.codecName.value());
@@ -574,9 +574,9 @@ std::string get_stream_json_metadata(
574574
if (streamMetadata.bitRate.has_value()) {
575575
map["bitRate"] = std::to_string(*streamMetadata.bitRate);
576576
}
577-
if (streamMetadata.numFramesFromScan.has_value()) {
578-
map["numFramesFromScan"] =
579-
std::to_string(*streamMetadata.numFramesFromScan);
577+
if (streamMetadata.numFramesFromContent.has_value()) {
578+
map["numFramesFromContent"] =
579+
std::to_string(*streamMetadata.numFramesFromContent);
580580
}
581581
if (streamMetadata.numFramesFromHeader.has_value()) {
582582
map["numFramesFromHeader"] =
@@ -586,13 +586,13 @@ std::string get_stream_json_metadata(
586586
map["beginStreamSecondsFromHeader"] =
587587
std::to_string(*streamMetadata.beginStreamSecondsFromHeader);
588588
}
589-
if (streamMetadata.minPtsSecondsFromScan.has_value()) {
590-
map["minPtsSecondsFromScan"] =
591-
std::to_string(*streamMetadata.minPtsSecondsFromScan);
589+
if (streamMetadata.beginStreamSecondsFromContent.has_value()) {
590+
map["beginStreamSecondsFromContent"] =
591+
std::to_string(*streamMetadata.beginStreamSecondsFromContent);
592592
}
593-
if (streamMetadata.maxPtsSecondsFromScan.has_value()) {
594-
map["maxPtsSecondsFromScan"] =
595-
std::to_string(*streamMetadata.maxPtsSecondsFromScan);
593+
if (streamMetadata.endStreamFromContentSeconds.has_value()) {
594+
map["endStreamFromContentSeconds"] =
595+
std::to_string(*streamMetadata.endStreamFromContentSeconds);
596596
}
597597
if (streamMetadata.codecName.has_value()) {
598598
map["codec"] = quoteValue(streamMetadata.codecName.value());

src/torchcodec/_samplers/video_clip_sampler.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -271,18 +271,18 @@ def _get_start_seconds(
271271
+ 1
272272
) / metadata_json["averageFpsFromHeader"]
273273

274-
minPtsSecondsFromScan = (
275-
metadata_json["minPtsSecondsFromScan"]
276-
if metadata_json["minPtsSecondsFromScan"]
274+
beginStreamSecondsFromContent = (
275+
metadata_json["beginStreamSecondsFromContent"]
276+
if metadata_json["beginStreamSecondsFromContent"]
277277
else 0
278278
)
279-
maxPtsSecondsFromScan = (
280-
metadata_json["maxPtsSecondsFromScan"]
281-
if metadata_json["maxPtsSecondsFromScan"] > 0
279+
endStreamFromContentSeconds = (
280+
metadata_json["endStreamFromContentSeconds"]
281+
if metadata_json["endStreamFromContentSeconds"] > 0
282282
else video_duration_in_seconds
283283
)
284284
last_possible_clip_start_in_seconds = (
285-
maxPtsSecondsFromScan - clip_duration_in_seconds
285+
endStreamFromContentSeconds - clip_duration_in_seconds
286286
)
287287
if last_possible_clip_start_in_seconds < 0:
288288
raise VideoTooShortException(
@@ -292,7 +292,7 @@ def _get_start_seconds(
292292
clip_starts_in_seconds: List[float] = []
293293
sample_start_second = max(
294294
time_based_sampler_args.sample_start_second,
295-
minPtsSecondsFromScan,
295+
beginStreamSecondsFromContent,
296296
)
297297
sample_end_second = min(
298298
last_possible_clip_start_in_seconds,

test/VideoDecoderTest.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -84,15 +84,15 @@ TEST_P(SingleStreamDecoderTest, ReturnsFpsAndDurationForVideoInMetadata) {
8484
EXPECT_NEAR(*videoStream.bitRate, 128783, 1e-1);
8585
EXPECT_NEAR(*videoStream.durationSeconds, 13.013, 1e-1);
8686
EXPECT_EQ(videoStream.numFrames, 390);
87-
EXPECT_FALSE(videoStream.minPtsSecondsFromScan.has_value());
88-
EXPECT_FALSE(videoStream.maxPtsSecondsFromScan.has_value());
89-
EXPECT_FALSE(videoStream.numFramesFromScan.has_value());
87+
EXPECT_FALSE(videoStream.beginStreamSecondsFromContent.has_value());
88+
EXPECT_FALSE(videoStream.endStreamFromContentSeconds.has_value());
89+
EXPECT_FALSE(videoStream.numFramesFromContent.has_value());
9090
decoder->scanFileAndUpdateMetadataAndIndex();
9191
metadata = decoder->getContainerMetadata();
9292
const auto& videoStream1 = metadata.allStreamMetadata[3];
93-
EXPECT_EQ(*videoStream1.minPtsSecondsFromScan, 0);
94-
EXPECT_EQ(*videoStream1.maxPtsSecondsFromScan, 13.013);
95-
EXPECT_EQ(*videoStream1.numFramesFromScan, 390);
93+
EXPECT_EQ(*videoStream1.beginStreamSecondsFromContent, 0);
94+
EXPECT_EQ(*videoStream1.endStreamFromContentSeconds, 13.013);
95+
EXPECT_EQ(*videoStream1.numFramesFromContent, 390);
9696
}
9797

9898
TEST(SingleStreamDecoderTest, MissingVideoFileThrowsException) {

test/test_ops.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -414,8 +414,8 @@ def test_video_get_json_metadata_with_stream(self):
414414
metadata_dict = json.loads(metadata)
415415
assert metadata_dict["width"] == 480
416416
assert metadata_dict["height"] == 270
417-
assert metadata_dict["minPtsSecondsFromScan"] == 0
418-
assert metadata_dict["maxPtsSecondsFromScan"] == 13.013
417+
assert metadata_dict["beginStreamSecondsFromContent"] == 0
418+
assert metadata_dict["endStreamFromContentSeconds"] == 13.013
419419

420420
def test_get_ffmpeg_version(self):
421421
ffmpeg_dict = get_ffmpeg_library_versions()

0 commit comments

Comments
 (0)