Update C++ metadata names to match python

danielflores3 · Dan-Flores · commit a60968ce4d6b · 2025-05-30T06:22:28.000-07:00
diff --git a/src/torchcodec/_core/Metadata.h b/src/torchcodec/_core/Metadata.h
@@ -25,9 +25,9 @@ struct StreamMetadata {
   AVMediaType mediaType;
   std::optional<AVCodecID> codecId;
   std::optional<std::string> codecName;
-  std::optional<double> durationSeconds;
+  std::optional<double> durationSecondsFromHeader;
   std::optional<double> beginStreamFromHeader;
-  std::optional<int64_t> numFrames;
+  std::optional<int64_t> numFramesFromHeader;
   std::optional<int64_t> numKeyFrames;
   std::optional<double> averageFps;
   std::optional<double> bitRate;
@@ -58,7 +58,7 @@ struct ContainerMetadata {
   int numVideoStreams = 0;
   // Note that this is the container-level duration, which is usually the max
   // of all stream durations available in the container.
-  std::optional<double> durationSeconds;
+  std::optional<double> durationSecondsFromHeader;
   // Total BitRate level information at the container level in bit/s
   std::optional<double> bitRate;
   // If set, this is the index to the default audio stream.
diff --git a/src/torchcodec/_core/SingleStreamDecoder.cpp b/src/torchcodec/_core/SingleStreamDecoder.cpp
@@ -125,11 +125,11 @@ void SingleStreamDecoder::initializeDecoder() {
 
     int64_t frameCount = avStream->nb_frames;
     if (frameCount > 0) {
-      streamMetadata.numFrames = frameCount;
+      streamMetadata.numFramesFromHeader = frameCount;
     }
 
     if (avStream->duration > 0 && avStream->time_base.den > 0) {
-      streamMetadata.durationSeconds =
+      streamMetadata.durationSecondsFromHeader =
           av_q2d(avStream->time_base) * avStream->duration;
     }
     if (avStream->start_time != AV_NOPTS_VALUE) {
@@ -163,7 +163,7 @@ void SingleStreamDecoder::initializeDecoder() {
 
   if (formatContext_->duration > 0) {
     AVRational defaultTimeBase{1, AV_TIME_BASE};
-    containerMetadata_.durationSeconds =
+    containerMetadata_.durationSecondsFromHeader =
         ptsToSeconds(formatContext_->duration, defaultTimeBase);
   }
 
@@ -1463,9 +1463,9 @@ int64_t SingleStreamDecoder::getNumFrames(
       return streamMetadata.numFramesFromScan.value();
     case SeekMode::approximate: {
       TORCH_CHECK(
-          streamMetadata.numFrames.has_value(),
+          streamMetadata.numFramesFromHeader.has_value(),
           "Cannot use approximate mode since we couldn't find the number of frames from the metadata.");
-      return streamMetadata.numFrames.value();
+      return streamMetadata.numFramesFromHeader.value();
     }
     default:
       throw std::runtime_error("Unknown SeekMode");
@@ -1491,9 +1491,9 @@ double SingleStreamDecoder::getMaxSeconds(
       return streamMetadata.maxPtsSecondsFromScan.value();
     case SeekMode::approximate: {
       TORCH_CHECK(
-          streamMetadata.durationSeconds.has_value(),
+          streamMetadata.durationSecondsFromHeader.has_value(),
           "Cannot use approximate mode since we couldn't find the duration from the metadata.");
-      return streamMetadata.durationSeconds.value();
+      return streamMetadata.durationSecondsFromHeader.value();
     }
     default:
       throw std::runtime_error("Unknown SeekMode");
diff --git a/src/torchcodec/_core/_metadata.py b/src/torchcodec/_core/_metadata.py
@@ -225,9 +225,9 @@ def get_container_metadata(decoder: torch.Tensor) -> ContainerMetadata:
     for stream_index in range(container_dict["numStreams"]):
         stream_dict = json.loads(_get_stream_json_metadata(decoder, stream_index))
         common_meta = dict(
-            duration_seconds_from_header=stream_dict.get("durationSeconds"),
+            duration_seconds_from_header=stream_dict.get("durationSecondsFromHeader"),
             bit_rate=stream_dict.get("bitRate"),
-            begin_stream_seconds_from_header=stream_dict.get("beginStreamFromHeader"),
+            begin_stream_seconds_from_header=stream_dict.get("beginStreamSecondsFromHeader"),
             codec=stream_dict.get("codec"),
             stream_index=stream_index,
         )
@@ -242,9 +242,9 @@ def get_container_metadata(decoder: torch.Tensor) -> ContainerMetadata:
                     ),
                     width=stream_dict.get("width"),
                     height=stream_dict.get("height"),
-                    num_frames_from_header=stream_dict.get("numFrames"),
+                    num_frames_from_header=stream_dict.get("numFramesFromHeader"),
                     num_frames_from_content=stream_dict.get("numFramesFromScan"),
-                    average_fps_from_header=stream_dict.get("averageFps"),
+                    average_fps_from_header=stream_dict.get("averageFpsFromHeader"),
                     **common_meta,
                 )
             )
@@ -264,7 +264,7 @@ def get_container_metadata(decoder: torch.Tensor) -> ContainerMetadata:
             streams_metadata.append(StreamMetadata(**common_meta))
 
     return ContainerMetadata(
-        duration_seconds_from_header=container_dict.get("durationSeconds"),
+        duration_seconds_from_header=container_dict.get("durationSecondsFromHeader"),
         bit_rate_from_header=container_dict.get("bitRate"),
         best_video_stream_index=container_dict.get("bestVideoStreamIndex"),
         best_audio_stream_index=container_dict.get("bestAudioStreamIndex"),
diff --git a/src/torchcodec/_core/custom_ops.cpp b/src/torchcodec/_core/custom_ops.cpp
@@ -456,18 +456,18 @@ std::string get_json_metadata(at::Tensor& decoder) {
 
   std::map<std::string, std::string> metadataMap;
   // serialize the metadata into a string std::stringstream ss;
-  double durationSeconds = 0;
+  double durationSecondsFromHeader = 0;
   if (maybeBestVideoStreamIndex.has_value() &&
       videoMetadata.allStreamMetadata[*maybeBestVideoStreamIndex]
-          .durationSeconds.has_value()) {
-    durationSeconds =
+          .durationSecondsFromHeader.has_value()) {
+            durationSecondsFromHeader =
         videoMetadata.allStreamMetadata[*maybeBestVideoStreamIndex]
-            .durationSeconds.value_or(0);
+            .durationSecondsFromHeader.value_or(0);
   } else {
     // Fallback to container-level duration if stream duration is not found.
-    durationSeconds = videoMetadata.durationSeconds.value_or(0);
+    durationSecondsFromHeader = videoMetadata.durationSecondsFromHeader.value_or(0);
   }
-  metadataMap["durationSeconds"] = std::to_string(durationSeconds);
+  metadataMap["durationSecondsFromHeader"] = std::to_string(durationSecondsFromHeader);
 
   if (videoMetadata.bitRate.has_value()) {
     metadataMap["bitRate"] = std::to_string(videoMetadata.bitRate.value());
@@ -523,8 +523,8 @@ std::string get_container_json_metadata(at::Tensor& decoder) {
 
   std::map<std::string, std::string> map;
 
-  if (containerMetadata.durationSeconds.has_value()) {
-    map["durationSeconds"] = std::to_string(*containerMetadata.durationSeconds);
+  if (containerMetadata.durationSecondsFromHeader.has_value()) {
+    map["durationSecondsFromHeader"] = std::to_string(*containerMetadata.durationSecondsFromHeader);
   }
 
   if (containerMetadata.bitRate.has_value()) {
@@ -562,8 +562,8 @@ std::string get_stream_json_metadata(
 
   std::map<std::string, std::string> map;
 
-  if (streamMetadata.durationSeconds.has_value()) {
-    map["durationSeconds"] = std::to_string(*streamMetadata.durationSeconds);
+  if (streamMetadata.durationSecondsFromHeader.has_value()) {
+    map["durationSecondsFromHeader"] = std::to_string(*streamMetadata.durationSecondsFromHeader);
   }
   if (streamMetadata.bitRate.has_value()) {
     map["bitRate"] = std::to_string(*streamMetadata.bitRate);
@@ -572,11 +572,11 @@ std::string get_stream_json_metadata(
     map["numFramesFromScan"] =
         std::to_string(*streamMetadata.numFramesFromScan);
   }
-  if (streamMetadata.numFrames.has_value()) {
-    map["numFrames"] = std::to_string(*streamMetadata.numFrames);
+  if (streamMetadata.numFramesFromHeader.has_value()) {
+    map["numFramesFromHeader"] = std::to_string(*streamMetadata.numFramesFromHeader);
   }
   if (streamMetadata.beginStreamFromHeader.has_value()) {
-    map["beginStreamFromHeader"] =
+    map["beginStreamSecondsFromHeader"] =
         std::to_string(*streamMetadata.beginStreamFromHeader);
   }
   if (streamMetadata.minPtsSecondsFromScan.has_value()) {
@@ -597,7 +597,7 @@ std::string get_stream_json_metadata(
     map["height"] = std::to_string(*streamMetadata.height);
   }
   if (streamMetadata.averageFps.has_value()) {
-    map["averageFps"] = std::to_string(*streamMetadata.averageFps);
+    map["averageFpsFromHeader"] = std::to_string(*streamMetadata.averageFps);
   }
   if (streamMetadata.sampleRate.has_value()) {
     map["sampleRate"] = std::to_string(*streamMetadata.sampleRate);