9
9
10
10
namespace facebook ::torchcodec {
11
11
12
- AVIOBytesContext::AVIOBytesContext (const void * data, int64_t dataSize)
13
- : dataContext_{static_cast <const uint8_t *>(data), dataSize, 0 } {
14
- TORCH_CHECK (data != nullptr , " Video data buffer cannot be nullptr!" );
15
- TORCH_CHECK (dataSize > 0 , " Video data size must be positive" );
16
- createAVIOContext (&read, nullptr , &seek, &dataContext_);
17
- }
12
+ namespace {
13
+
14
+ constexpr int64_t INITIAL_TENSOR_SIZE = 10'000'000 ; // 10 MB
15
+ constexpr int64_t MAX_TENSOR_SIZE = 320'000'000 ; // 320 MB
16
+ //
18
17
19
18
// The signature of this function is defined by FFMPEG.
20
- int AVIOBytesContext:: read (void * opaque, uint8_t * buf, int buf_size) {
21
- auto dataContext = static_cast <DataContext *>(opaque);
19
+ int read (void * opaque, uint8_t * buf, int buf_size) {
20
+ auto tensorContext = static_cast <TensorContext *>(opaque);
22
21
TORCH_CHECK (
23
- dataContext ->current <= dataContext-> size ,
22
+ tensorContext ->current <= tensorContext-> data . numel () ,
24
23
" Tried to read outside of the buffer: current=" ,
25
- dataContext ->current ,
24
+ tensorContext ->current ,
26
25
" , size=" ,
27
- dataContext-> size );
26
+ tensorContext-> data . numel () );
28
27
29
28
int64_t numBytesRead = std::min (
30
- static_cast <int64_t >(buf_size), dataContext->size - dataContext->current );
29
+ static_cast <int64_t >(buf_size),
30
+ tensorContext->data .numel () - tensorContext->current );
31
31
32
32
TORCH_CHECK (
33
33
numBytesRead >= 0 ,
34
34
" Tried to read negative bytes: numBytesRead=" ,
35
35
numBytesRead,
36
36
" , size=" ,
37
- dataContext-> size ,
37
+ tensorContext-> data . numel () ,
38
38
" , current=" ,
39
- dataContext ->current );
39
+ tensorContext ->current );
40
40
41
41
if (numBytesRead == 0 ) {
42
42
return AVERROR_EOF;
43
43
}
44
44
45
- std::memcpy (buf, dataContext->data + dataContext->current , numBytesRead);
46
- dataContext->current += numBytesRead;
45
+ std::memcpy (
46
+ buf,
47
+ tensorContext->data .data_ptr <uint8_t >() + tensorContext->current ,
48
+ numBytesRead);
49
+ tensorContext->current += numBytesRead;
47
50
return numBytesRead;
48
51
}
49
52
50
53
// The signature of this function is defined by FFMPEG.
51
- int64_t AVIOBytesContext::seek (void * opaque, int64_t offset, int whence) {
52
- auto dataContext = static_cast <DataContext*>(opaque);
53
- int64_t ret = -1 ;
54
-
55
- switch (whence) {
56
- case AVSEEK_SIZE:
57
- ret = dataContext->size ;
58
- break ;
59
- case SEEK_SET:
60
- dataContext->current = offset;
61
- ret = offset;
62
- break ;
63
- default :
64
- break ;
65
- }
66
-
67
- return ret;
68
- }
69
-
70
- AVIOToTensorContext::AVIOToTensorContext ()
71
- : dataContext_{
72
- torch::empty (
73
- {AVIOToTensorContext::INITIAL_TENSOR_SIZE},
74
- {torch::kUInt8 }),
75
- 0 } {
76
- createAVIOContext (nullptr , &write, &seek, &dataContext_);
77
- }
78
-
79
- // The signature of this function is defined by FFMPEG.
80
- int AVIOToTensorContext::write (void * opaque, const uint8_t * buf, int buf_size) {
81
- auto dataContext = static_cast <DataContext*>(opaque);
54
+ int write (void * opaque, const uint8_t * buf, int buf_size) {
55
+ auto tensorContext = static_cast <TensorContext*>(opaque);
82
56
83
57
int64_t bufSize = static_cast <int64_t >(buf_size);
84
- if (dataContext ->current + bufSize > dataContext-> outputTensor .numel ()) {
58
+ if (tensorContext ->current + bufSize > tensorContext-> data .numel ()) {
85
59
TORCH_CHECK (
86
- dataContext->outputTensor .numel () * 2 <=
87
- AVIOToTensorContext::MAX_TENSOR_SIZE,
60
+ tensorContext->data .numel () * 2 <= MAX_TENSOR_SIZE,
88
61
" We tried to allocate an output encoded tensor larger than " ,
89
- AVIOToTensorContext:: MAX_TENSOR_SIZE,
62
+ MAX_TENSOR_SIZE,
90
63
" bytes. If you think this should be supported, please report." );
91
64
92
65
// We double the size of the outpout tensor. Calling cat() may not be the
93
66
// most efficient, but it's simple.
94
- dataContext-> outputTensor =
95
- torch::cat ({dataContext-> outputTensor , dataContext-> outputTensor });
67
+ tensorContext-> data =
68
+ torch::cat ({tensorContext-> data , tensorContext-> data });
96
69
}
97
70
98
71
TORCH_CHECK (
99
- dataContext ->current + bufSize <= dataContext-> outputTensor .numel (),
72
+ tensorContext ->current + bufSize <= tensorContext-> data .numel (),
100
73
" Re-allocation of the output tensor didn't work. " ,
101
74
" This should not happen, please report on TorchCodec bug tracker" );
102
75
103
- uint8_t * outputTensorData = dataContext-> outputTensor .data_ptr <uint8_t >();
104
- std::memcpy (outputTensorData + dataContext ->current , buf, bufSize);
105
- dataContext ->current += bufSize;
76
+ uint8_t * outputTensorData = tensorContext-> data .data_ptr <uint8_t >();
77
+ std::memcpy (outputTensorData + tensorContext ->current , buf, bufSize);
78
+ tensorContext ->current += bufSize;
106
79
return buf_size;
107
80
}
108
81
109
82
// The signature of this function is defined by FFMPEG.
110
- // Note: This `seek()` implementation is very similar to that of
111
- // AVIOBytesContext. We could consider merging both classes, or do some kind of
112
- // refac, but this doesn't seem worth it ATM.
113
- int64_t AVIOToTensorContext::seek (void * opaque, int64_t offset, int whence) {
114
- auto dataContext = static_cast <DataContext*>(opaque);
83
+ int64_t seek (void * opaque, int64_t offset, int whence) {
84
+ auto tensorContext = static_cast <TensorContext*>(opaque);
115
85
int64_t ret = -1 ;
116
86
117
87
switch (whence) {
118
88
case AVSEEK_SIZE:
119
- ret = dataContext-> outputTensor .numel ();
89
+ ret = tensorContext-> data .numel ();
120
90
break ;
121
91
case SEEK_SET:
122
- dataContext ->current = offset;
92
+ tensorContext ->current = offset;
123
93
ret = offset;
124
94
break ;
125
95
default :
@@ -129,9 +99,24 @@ int64_t AVIOToTensorContext::seek(void* opaque, int64_t offset, int whence) {
129
99
return ret;
130
100
}
131
101
102
+ } // namespace
103
+
104
+ AVIOFromTensorContext::AVIOFromTensorContext (torch::Tensor data)
105
+ : tensorContext_{data, 0 } {
106
+ TORCH_CHECK (data.numel () > 0 , " data must not be empty" );
107
+ TORCH_CHECK (data.is_contiguous (), " data must be contiguous" );
108
+ TORCH_CHECK (data.scalar_type () == torch::kUInt8 , " data must be kUInt8" );
109
+ createAVIOContext (&read, nullptr , &seek, &tensorContext_);
110
+ }
111
+
112
+ AVIOToTensorContext::AVIOToTensorContext ()
113
+ : tensorContext_{torch::empty ({INITIAL_TENSOR_SIZE}, {torch::kUInt8 }), 0 } {
114
+ createAVIOContext (nullptr , &write, &seek, &tensorContext_);
115
+ }
116
+
132
117
torch::Tensor AVIOToTensorContext::getOutputTensor () {
133
- return dataContext_. outputTensor .narrow (
134
- /* dim=*/ 0 , /* start=*/ 0 , /* length=*/ dataContext_ .current );
118
+ return tensorContext_. data .narrow (
119
+ /* dim=*/ 0 , /* start=*/ 0 , /* length=*/ tensorContext_ .current );
135
120
}
136
121
137
122
} // namespace facebook::torchcodec
0 commit comments