pytorch · scotts · Jun 11, 2025 · Jun 10, 2025 · Jun 11, 2025 · Jun 11, 2025
diff --git a/src/torchcodec/_core/AVIOBytesContext.cpp b/src/torchcodec/_core/AVIOBytesContext.cpp
diff --git a/src/torchcodec/_core/AVIOBytesContext.h b/src/torchcodec/_core/AVIOBytesContext.h
diff --git a/src/torchcodec/_core/AVIOContextHolder.h b/src/torchcodec/_core/AVIOContextHolder.h
@@ -27,8 +27,9 @@ namespace facebook::torchcodec {
 //           tracks the custom behavior of reading, seeking and writing. It is
 //           provided upon AVIOContext creation and to the read, seek and
 //           write callback functions.
-//      While it's not required, it is natural for the derived classes to make
-//      all of the above members. Base classes need to call
+//      The callback functions do not need to be members of the derived class,
+//      but the derived class must have access to them. The context object must
+//      be a member of the derived class. Derived classes need to call
 //      createAVIOContext(), ideally in their constructor.
 //  3. A generic handle for those that just need to manage having access to an
 //     AVIOContext, but aren't necessarily concerned with how it was customized:

diff --git a/src/torchcodec/_core/AVIOTensorContext.cpp b/src/torchcodec/_core/AVIOTensorContext.cpp
@@ -0,0 +1,121 @@
+// Copyright (c) Meta Platforms, Inc. and affiliates.
+// All rights reserved.
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#include "src/torchcodec/_core/AVIOTensorContext.h"
+#include <torch/types.h>
+
+namespace facebook::torchcodec {
+
+namespace {
+
+constexpr int64_t INITIAL_TENSOR_SIZE = 10'000'000; // 10 MB
+constexpr int64_t MAX_TENSOR_SIZE = 320'000'000; // 320 MB
+
+// The signature of this function is defined by FFMPEG.
+int read(void* opaque, uint8_t* buf, int buf_size) {
+  auto tensorContext = static_cast<detail::TensorContext*>(opaque);
+  TORCH_CHECK(
+      tensorContext->current <= tensorContext->data.numel(),
+      "Tried to read outside of the buffer: current=",
+      tensorContext->current,
+      ", size=",
+      tensorContext->data.numel());
+
+  int64_t numBytesRead = std::min(
+      static_cast<int64_t>(buf_size),
+      tensorContext->data.numel() - tensorContext->current);
+
+  TORCH_CHECK(
+      numBytesRead >= 0,
+      "Tried to read negative bytes: numBytesRead=",
+      numBytesRead,
+      ", size=",
+      tensorContext->data.numel(),
+      ", current=",
+      tensorContext->current);
+
+  if (numBytesRead == 0) {
+    return AVERROR_EOF;
+  }
+
+  std::memcpy(
+      buf,
+      tensorContext->data.data_ptr<uint8_t>() + tensorContext->current,
+      numBytesRead);
+  tensorContext->current += numBytesRead;
+  return numBytesRead;
+}
+
+// The signature of this function is defined by FFMPEG.
+int write(void* opaque, const uint8_t* buf, int buf_size) {
+  auto tensorContext = static_cast<detail::TensorContext*>(opaque);
+
+  int64_t bufSize = static_cast<int64_t>(buf_size);
+  if (tensorContext->current + bufSize > tensorContext->data.numel()) {
+    TORCH_CHECK(
+        tensorContext->data.numel() * 2 <= MAX_TENSOR_SIZE,
+        "We tried to allocate an output encoded tensor larger than ",
+        MAX_TENSOR_SIZE,
+        " bytes. If you think this should be supported, please report.");
+
+    // We double the size of the outpout tensor. Calling cat() may not be the
+    // most efficient, but it's simple.
+    tensorContext->data =
+        torch::cat({tensorContext->data, tensorContext->data});
+  }
+
+  TORCH_CHECK(
+      tensorContext->current + bufSize <= tensorContext->data.numel(),
+      "Re-allocation of the output tensor didn't work. ",
+      "This should not happen, please report on TorchCodec bug tracker");
+
+  uint8_t* outputTensorData = tensorContext->data.data_ptr<uint8_t>();
+  std::memcpy(outputTensorData + tensorContext->current, buf, bufSize);
+  tensorContext->current += bufSize;
+  return buf_size;
+}
+
+// The signature of this function is defined by FFMPEG.
+int64_t seek(void* opaque, int64_t offset, int whence) {
+  auto tensorContext = static_cast<detail::TensorContext*>(opaque);
+  int64_t ret = -1;
+
+  switch (whence) {
+    case AVSEEK_SIZE:
+      ret = tensorContext->data.numel();
+      break;
+    case SEEK_SET:
+      tensorContext->current = offset;
+      ret = offset;
+      break;
+    default:
+      break;
+  }
+
+  return ret;
+}
+
+} // namespace
+
+AVIOFromTensorContext::AVIOFromTensorContext(torch::Tensor data)
+    : tensorContext_{data, 0} {
+  TORCH_CHECK(data.numel() > 0, "data must not be empty");
+  TORCH_CHECK(data.is_contiguous(), "data must be contiguous");
+  TORCH_CHECK(data.scalar_type() == torch::kUInt8, "data must be kUInt8");
+  createAVIOContext(&read, nullptr, &seek, &tensorContext_);
+}
+
+AVIOToTensorContext::AVIOToTensorContext()
+    : tensorContext_{torch::empty({INITIAL_TENSOR_SIZE}, {torch::kUInt8}), 0} {
+  createAVIOContext(nullptr, &write, &seek, &tensorContext_);
+}
+
+torch::Tensor AVIOToTensorContext::getOutputTensor() {
+  return tensorContext_.data.narrow(
+      /*dim=*/0, /*start=*/0, /*length=*/tensorContext_.current);
+}
+
+} // namespace facebook::torchcodec
diff --git a/src/torchcodec/_core/AVIOTensorContext.h b/src/torchcodec/_core/AVIOTensorContext.h
@@ -0,0 +1,43 @@
+// Copyright (c) Meta Platforms, Inc. and affiliates.
+// All rights reserved.
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#pragma once
+
+#include <torch/types.h>
+#include "src/torchcodec/_core/AVIOContextHolder.h"
+
+namespace facebook::torchcodec {
+
+namespace detail {
+
+struct TensorContext {
+  torch::Tensor data;
+  int64_t current;
+};
+
+} // namespace detail
+
+// For Decoding: enables users to pass in the entire video or audio as bytes.
+// Our read and seek functions then traverse the bytes in memory.
+class AVIOFromTensorContext : public AVIOContextHolder {
+ public:
+  explicit AVIOFromTensorContext(torch::Tensor data);
+
+ private:
+  detail::TensorContext tensorContext_;
+};
+
+// For Encoding: used to encode into an output uint8 (bytes) tensor.
+class AVIOToTensorContext : public AVIOContextHolder {
+ public:
+  explicit AVIOToTensorContext();
+  torch::Tensor getOutputTensor();
+
+ private:
+  detail::TensorContext tensorContext_;
+};
+
+} // namespace facebook::torchcodec
diff --git a/src/torchcodec/_core/CMakeLists.txt b/src/torchcodec/_core/CMakeLists.txt
@@ -65,7 +65,7 @@ function(make_torchcodec_libraries
     set(decoder_library_name "libtorchcodec_decoder${ffmpeg_major_version}")
     set(decoder_sources
         AVIOContextHolder.cpp
-        AVIOBytesContext.cpp
+        AVIOTensorContext.cpp
         FFMPEGCommon.cpp
         Frame.cpp
         DeviceInterface.cpp
@@ -102,7 +102,7 @@ function(make_torchcodec_libraries
     # 2. Create libtorchcodec_custom_opsN.{ext}.
     set(custom_ops_library_name "libtorchcodec_custom_ops${ffmpeg_major_version}")
     set(custom_ops_sources
-        AVIOBytesContext.cpp
+        AVIOTensorContext.cpp
         custom_ops.cpp
     )
     set(custom_ops_dependencies

diff --git a/src/torchcodec/_core/Encoder.cpp b/src/torchcodec/_core/Encoder.cpp
@@ -1,6 +1,6 @@
 #include <sstream>
 
-#include "src/torchcodec/_core/AVIOBytesContext.h"
+#include "src/torchcodec/_core/AVIOTensorContext.h"
 #include "src/torchcodec/_core/Encoder.h"
 #include "torch/types.h"