Skip to content

Commit d6ce570

Browse files
authored
VideoStreamMetadata.sample_aspect_ratio: new metadata field (#733) (#737)
1 parent ffac96c commit d6ce570

File tree

5 files changed

+31
-0
lines changed

5 files changed

+31
-0
lines changed

src/torchcodec/_core/Metadata.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
extern "C" {
1414
#include <libavcodec/avcodec.h>
1515
#include <libavutil/avutil.h>
16+
#include <libavutil/rational.h>
1617
}
1718

1819
namespace facebook::torchcodec {
@@ -45,6 +46,7 @@ struct StreamMetadata {
4546
// Video-only fields derived from the AVCodecContext.
4647
std::optional<int64_t> width;
4748
std::optional<int64_t> height;
49+
std::optional<AVRational> sampleAspectRatio;
4850

4951
// Audio-only fields
5052
std::optional<int64_t> sampleRate;

src/torchcodec/_core/SingleStreamDecoder.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -454,6 +454,8 @@ void SingleStreamDecoder::addVideoStream(
454454

455455
streamMetadata.width = streamInfo.codecContext->width;
456456
streamMetadata.height = streamInfo.codecContext->height;
457+
streamMetadata.sampleAspectRatio =
458+
streamInfo.codecContext->sample_aspect_ratio;
457459
}
458460

459461
void SingleStreamDecoder::addAudioStream(

src/torchcodec/_core/_metadata.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import json
99
import pathlib
1010
from dataclasses import dataclass
11+
from fractions import Fraction
1112
from typing import List, Optional, Union
1213

1314
import torch
@@ -80,6 +81,11 @@ class VideoStreamMetadata(StreamMetadata):
8081
average_fps_from_header: Optional[float]
8182
"""Averate fps of the stream, obtained from the header (float or None).
8283
We recommend using the ``average_fps`` attribute instead."""
84+
pixel_aspect_ratio: Optional[Fraction]
85+
"""Pixel Aspect Ratio (PAR), also known as Sample Aspect Ratio
86+
(SAR --- not to be confused with Storage Aspect Ratio, also SAR),
87+
is the ratio between the width and height of each pixel
88+
(``fractions.Fraction`` or None)."""
8389

8490
@property
8591
def duration_seconds(self) -> Optional[float]:
@@ -229,6 +235,16 @@ def best_audio_stream(self) -> AudioStreamMetadata:
229235
return metadata
230236

231237

238+
def _get_optional_par_fraction(stream_dict):
239+
try:
240+
return Fraction(
241+
stream_dict["sampleAspectRatioNum"],
242+
stream_dict["sampleAspectRatioDen"],
243+
)
244+
except KeyError:
245+
return None
246+
247+
232248
# TODO-AUDIO: This is user-facing. Should this just be `get_metadata`, without
233249
# the "container" name in it? Same below.
234250
def get_container_metadata(decoder: torch.Tensor) -> ContainerMetadata:
@@ -265,6 +281,7 @@ def get_container_metadata(decoder: torch.Tensor) -> ContainerMetadata:
265281
num_frames_from_header=stream_dict.get("numFramesFromHeader"),
266282
num_frames_from_content=stream_dict.get("numFramesFromContent"),
267283
average_fps_from_header=stream_dict.get("averageFpsFromHeader"),
284+
pixel_aspect_ratio=_get_optional_par_fraction(stream_dict),
268285
**common_meta,
269286
)
270287
)

src/torchcodec/_core/custom_ops.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -604,6 +604,12 @@ std::string get_stream_json_metadata(
604604
if (streamMetadata.height.has_value()) {
605605
map["height"] = std::to_string(*streamMetadata.height);
606606
}
607+
if (streamMetadata.sampleAspectRatio.has_value()) {
608+
map["sampleAspectRatioNum"] =
609+
std::to_string((*streamMetadata.sampleAspectRatio).num);
610+
map["sampleAspectRatioDen"] =
611+
std::to_string((*streamMetadata.sampleAspectRatio).den);
612+
}
607613
if (streamMetadata.averageFpsFromHeader.has_value()) {
608614
map["averageFpsFromHeader"] =
609615
std::to_string(*streamMetadata.averageFpsFromHeader);

test/test_metadata.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
# LICENSE file in the root directory of this source tree.
66

77
import functools
8+
from fractions import Fraction
89

910
import pytest
1011

@@ -81,6 +82,7 @@ def test_get_metadata(metadata_getter):
8182
assert best_video_stream_metadata.begin_stream_seconds_from_header == 0
8283
assert best_video_stream_metadata.bit_rate == 128783
8384
assert best_video_stream_metadata.average_fps == pytest.approx(29.97, abs=0.001)
85+
assert best_video_stream_metadata.pixel_aspect_ratio is None
8486
assert best_video_stream_metadata.codec == "h264"
8587
assert best_video_stream_metadata.num_frames_from_content == (
8688
390 if with_scan else None
@@ -137,6 +139,7 @@ def test_num_frames_fallback(
137139
width=123,
138140
height=321,
139141
average_fps_from_header=30,
142+
pixel_aspect_ratio=Fraction(1, 1),
140143
stream_index=0,
141144
)
142145

@@ -248,6 +251,7 @@ def test_repr():
248251
num_frames_from_header: 390
249252
num_frames_from_content: 390
250253
average_fps_from_header: 29.97003
254+
pixel_aspect_ratio: 1
251255
duration_seconds: 13.013
252256
begin_stream_seconds: 0.0
253257
end_stream_seconds: 13.013

0 commit comments

Comments
 (0)