Skip to content

Turn BytesContext into FromTensorContext #721

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Jun 11, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
137 changes: 0 additions & 137 deletions src/torchcodec/_core/AVIOBytesContext.cpp

This file was deleted.

54 changes: 0 additions & 54 deletions src/torchcodec/_core/AVIOBytesContext.h

This file was deleted.

5 changes: 3 additions & 2 deletions src/torchcodec/_core/AVIOContextHolder.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,9 @@ namespace facebook::torchcodec {
// tracks the custom behavior of reading, seeking and writing. It is
// provided upon AVIOContext creation and to the read, seek and
// write callback functions.
// While it's not required, it is natural for the derived classes to make
// all of the above members. Base classes need to call
// The callback functions do not need to be members of the derived class,
// but the derived class must have access to them. The context object must
// be a member of the derived class. Derived classes need to call
// createAVIOContext(), ideally in their constructor.
// 3. A generic handle for those that just need to manage having access to an
// AVIOContext, but aren't necessarily concerned with how it was customized:
Expand Down
121 changes: 121 additions & 0 deletions src/torchcodec/_core/AVIOTensorContext.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
// Copyright (c) Meta Platforms, Inc. and affiliates.
// All rights reserved.
//
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree.

#include "src/torchcodec/_core/AVIOTensorContext.h"
#include <torch/types.h>

namespace facebook::torchcodec {

namespace {

constexpr int64_t INITIAL_TENSOR_SIZE = 10'000'000; // 10 MB
constexpr int64_t MAX_TENSOR_SIZE = 320'000'000; // 320 MB

// The signature of this function is defined by FFMPEG.
int read(void* opaque, uint8_t* buf, int buf_size) {
auto tensorContext = static_cast<detail::TensorContext*>(opaque);
TORCH_CHECK(
tensorContext->current <= tensorContext->data.numel(),
"Tried to read outside of the buffer: current=",
tensorContext->current,
", size=",
tensorContext->data.numel());

int64_t numBytesRead = std::min(
static_cast<int64_t>(buf_size),
tensorContext->data.numel() - tensorContext->current);

TORCH_CHECK(
numBytesRead >= 0,
"Tried to read negative bytes: numBytesRead=",
numBytesRead,
", size=",
tensorContext->data.numel(),
", current=",
tensorContext->current);

if (numBytesRead == 0) {
return AVERROR_EOF;
}

std::memcpy(
buf,
tensorContext->data.data_ptr<uint8_t>() + tensorContext->current,
numBytesRead);
tensorContext->current += numBytesRead;
return numBytesRead;
}

// The signature of this function is defined by FFMPEG.
int write(void* opaque, const uint8_t* buf, int buf_size) {
auto tensorContext = static_cast<detail::TensorContext*>(opaque);

int64_t bufSize = static_cast<int64_t>(buf_size);
if (tensorContext->current + bufSize > tensorContext->data.numel()) {
TORCH_CHECK(
tensorContext->data.numel() * 2 <= MAX_TENSOR_SIZE,
"We tried to allocate an output encoded tensor larger than ",
MAX_TENSOR_SIZE,
" bytes. If you think this should be supported, please report.");

// We double the size of the outpout tensor. Calling cat() may not be the
// most efficient, but it's simple.
tensorContext->data =
torch::cat({tensorContext->data, tensorContext->data});
}

TORCH_CHECK(
tensorContext->current + bufSize <= tensorContext->data.numel(),
"Re-allocation of the output tensor didn't work. ",
"This should not happen, please report on TorchCodec bug tracker");

uint8_t* outputTensorData = tensorContext->data.data_ptr<uint8_t>();
std::memcpy(outputTensorData + tensorContext->current, buf, bufSize);
tensorContext->current += bufSize;
return buf_size;
}

// The signature of this function is defined by FFMPEG.
int64_t seek(void* opaque, int64_t offset, int whence) {
auto tensorContext = static_cast<detail::TensorContext*>(opaque);
int64_t ret = -1;

switch (whence) {
case AVSEEK_SIZE:
ret = tensorContext->data.numel();
break;
case SEEK_SET:
tensorContext->current = offset;
ret = offset;
break;
default:
break;
}

return ret;
}

} // namespace

AVIOFromTensorContext::AVIOFromTensorContext(torch::Tensor data)
: tensorContext_{data, 0} {
TORCH_CHECK(data.numel() > 0, "data must not be empty");
TORCH_CHECK(data.is_contiguous(), "data must be contiguous");
TORCH_CHECK(data.scalar_type() == torch::kUInt8, "data must be kUInt8");
createAVIOContext(&read, nullptr, &seek, &tensorContext_);
}

AVIOToTensorContext::AVIOToTensorContext()
: tensorContext_{torch::empty({INITIAL_TENSOR_SIZE}, {torch::kUInt8}), 0} {
createAVIOContext(nullptr, &write, &seek, &tensorContext_);
}

torch::Tensor AVIOToTensorContext::getOutputTensor() {
return tensorContext_.data.narrow(
/*dim=*/0, /*start=*/0, /*length=*/tensorContext_.current);
}

} // namespace facebook::torchcodec
43 changes: 43 additions & 0 deletions src/torchcodec/_core/AVIOTensorContext.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
// Copyright (c) Meta Platforms, Inc. and affiliates.
// All rights reserved.
//
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree.

#pragma once

#include <torch/types.h>
#include "src/torchcodec/_core/AVIOContextHolder.h"

namespace facebook::torchcodec {

namespace detail {

struct TensorContext {
torch::Tensor data;
int64_t current;
};

} // namespace detail

// For Decoding: enables users to pass in the entire video or audio as bytes.
// Our read and seek functions then traverse the bytes in memory.
class AVIOFromTensorContext : public AVIOContextHolder {
public:
explicit AVIOFromTensorContext(torch::Tensor data);

private:
detail::TensorContext tensorContext_;
};

// For Encoding: used to encode into an output uint8 (bytes) tensor.
class AVIOToTensorContext : public AVIOContextHolder {
public:
explicit AVIOToTensorContext();
torch::Tensor getOutputTensor();

private:
detail::TensorContext tensorContext_;
};

} // namespace facebook::torchcodec
4 changes: 2 additions & 2 deletions src/torchcodec/_core/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ function(make_torchcodec_libraries
set(decoder_library_name "libtorchcodec_decoder${ffmpeg_major_version}")
set(decoder_sources
AVIOContextHolder.cpp
AVIOBytesContext.cpp
AVIOTensorContext.cpp
FFMPEGCommon.cpp
Frame.cpp
DeviceInterface.cpp
Expand Down Expand Up @@ -102,7 +102,7 @@ function(make_torchcodec_libraries
# 2. Create libtorchcodec_custom_opsN.{ext}.
set(custom_ops_library_name "libtorchcodec_custom_ops${ffmpeg_major_version}")
set(custom_ops_sources
AVIOBytesContext.cpp
AVIOTensorContext.cpp
custom_ops.cpp
)
set(custom_ops_dependencies
Expand Down
2 changes: 1 addition & 1 deletion src/torchcodec/_core/Encoder.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#include <sstream>

#include "src/torchcodec/_core/AVIOBytesContext.h"
#include "src/torchcodec/_core/AVIOTensorContext.h"
#include "src/torchcodec/_core/Encoder.h"
#include "torch/types.h"

Expand Down
Loading
Loading