Skip to content

Commit 8e4be4a

Browse files
authored
Merge pull request #64 from stackhpc/byte-order
Add support for byte order
2 parents db30c4a + afd80a4 commit 8e4be4a

File tree

14 files changed

+369
-153
lines changed

14 files changed

+369
-153
lines changed

Cargo.lock

Lines changed: 3 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "reductionist"
3-
version = "0.4.0"
3+
version = "0.5.0"
44
edition = "2021"
55
# Due to AWS SDK.
66
rust-version = "1.66.1"
@@ -26,7 +26,7 @@ maligned = "0.2.1"
2626
mime = "0.3"
2727
ndarray = "0.15"
2828
ndarray-stats = "0.5"
29-
num-traits = "0.2.15"
29+
num-traits = "0.2.16"
3030
prometheus = { version = "0.13", features = ["process"] }
3131
serde = { version = "1.0", features = ["derive"] }
3232
serde_json = "*"
@@ -47,6 +47,10 @@ criterion = { version = "0.4", features = ["html_reports"] }
4747
regex = "1"
4848
serde_test = "1.0"
4949

50+
[[bench]]
51+
name = "byte_order"
52+
harness = false
53+
5054
[[bench]]
5155
name = "shuffle"
5256
harness = false

README.md

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,10 @@ with a JSON payload of the form:
5050
// - required
5151
"dtype": "int32|int64|uint32|uint64|float32|float64",
5252
53+
// The byte order (endianness) of the data
54+
// - optional, defaults to native byte order of Reductionist server
55+
"byte_order": "big|little",
56+
5357
// The offset in bytes to use when reading data
5458
// - optional, defaults to zero
5559
"offset": 0,
@@ -99,7 +103,12 @@ with a JSON payload of the form:
99103

100104
The currently supported reducers are `max`, `min`, `sum`, `select` and `count`. All reducers return the result using the same datatype as specified in the request except for `count` which always returns the result as `int64`.
101105

102-
The proxy adds two custom headers `x-activestorage-dtype` and `x-activestrorage-shape` to the HTTP response to allow the numeric result to be reconstructed from the binary content of the response. An additional `x-activestorage-count` header is also returned which contains the number of array elements operated on while performing the requested reduction. This header is useful, for example, to calculate the mean over multiple requests where the number of items operated on may differ between chunks.
106+
The proxy returns the following headers to the HTTP response:
107+
108+
* `x-activestorage-dtype`: The data type of the data in the response payload. One of `int32`, `int64`, `uint32`, `uint64`, `float32` or `float64`.
109+
* `x-activestorage-byte-order`: The byte order of the data in the response payload. Either `big` or `little`.
110+
* `x-activestrorage-shape`: A JSON-encoded list of numbers describing the shape of the data in the response payload. May be an empty list for a scalar result.
111+
* `x-activestorage-count`: The number of non-missing array elements operated on while performing the requested reduction. This header is useful, for example, to calculate the mean over multiple requests where the number of items operated on may differ between chunks.
103112

104113
[//]: <> (TODO: No OpenAPI support yet).
105114
[//]: <> (For a running instance of the proxy server, the full OpenAPI specification is browsable as a web page at the `{proxy-address}/redoc/` endpoint or in raw JSON form at `{proxy-address}/openapi.json`.)

benches/byte_order.rs

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
/// Benchmarks for the byte order reversal implementation.
2+
use criterion::{black_box, criterion_group, criterion_main, Criterion};
3+
use reductionist::array::{build_array_mut_from_shape, get_shape, reverse_array_byte_order};
4+
use reductionist::models::{DType, RequestData, Slice};
5+
use url::Url;
6+
7+
fn get_test_request_data() -> RequestData {
8+
RequestData {
9+
source: Url::parse("http://example.com").unwrap(),
10+
bucket: "bar".to_string(),
11+
object: "baz".to_string(),
12+
dtype: DType::Int32,
13+
byte_order: None,
14+
offset: None,
15+
size: None,
16+
shape: None,
17+
order: None,
18+
selection: None,
19+
compression: None,
20+
filters: None,
21+
missing: None,
22+
}
23+
}
24+
25+
fn criterion_benchmark(c: &mut Criterion) {
26+
for size_k in [64, 256, 1024] {
27+
let size: isize = size_k * 1024;
28+
let mut data: Vec<u32> = (0_u32..(size as u32)).collect::<Vec<u32>>();
29+
let mut request_data = get_test_request_data();
30+
request_data.dtype = DType::Uint32;
31+
let shape = get_shape(data.len(), &request_data);
32+
let mut array = build_array_mut_from_shape(shape, &mut data).unwrap();
33+
for selection in [None, Some(vec![Slice::new(size / 4, size / 2, 2)])] {
34+
let name = format!("byte_order({}, {:?})", size, selection);
35+
c.bench_function(&name, |b| {
36+
b.iter(|| {
37+
reverse_array_byte_order(black_box(&mut array), &selection);
38+
})
39+
});
40+
}
41+
}
42+
}
43+
44+
criterion_group!(benches, criterion_benchmark);
45+
criterion_main!(benches);

scripts/client.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ def get_args() -> argparse.Namespace:
3131
parser.add_argument("--bucket", required=True, type=str)
3232
parser.add_argument("--object", required=True, type=str)
3333
parser.add_argument("--dtype", required=True, type=str) #, choices=DTYPES) allow invalid
34+
parser.add_argument("--byte-order", type=str, choices=["big", "little"])
3435
parser.add_argument("--offset", type=int)
3536
parser.add_argument("--size", type=int)
3637
parser.add_argument("--shape", type=str)
@@ -66,6 +67,8 @@ def build_request_data(args: argparse.Namespace) -> dict:
6667
'order': args.order,
6768
'compression': args.compression,
6869
}
70+
if args.byte_order:
71+
request_data["byte_order"] = args.byte_order
6972
if args.shape:
7073
request_data["shape"] = json.loads(args.shape)
7174
if args.selection:

src/app.rs

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ use crate::models;
77
use crate::operation;
88
use crate::operations;
99
use crate::s3_client;
10+
use crate::types::{ByteOrder, NATIVE_BYTE_ORDER};
1011
use crate::validated_json::ValidatedJson;
1112

1213
use axum::middleware;
@@ -34,6 +35,13 @@ static HEADER_DTYPE: header::HeaderName = header::HeaderName::from_static("x-act
3435
static HEADER_SHAPE: header::HeaderName = header::HeaderName::from_static("x-activestorage-shape");
3536
/// `x-activestorage-count` header definition
3637
static HEADER_COUNT: header::HeaderName = header::HeaderName::from_static("x-activestorage-count");
38+
/// `x-activestorage-byte-order` header definition
39+
static HEADER_BYTE_ORDER: header::HeaderName =
40+
header::HeaderName::from_static("x-activestorage-byte-order");
41+
const HEADER_BYTE_ORDER_VALUE: &str = match NATIVE_BYTE_ORDER {
42+
ByteOrder::Big => "big",
43+
ByteOrder::Little => "little",
44+
};
3745

3846
impl IntoResponse for models::Response {
3947
/// Convert a [crate::models::Response] into a [axum::response::Response].
@@ -47,6 +55,7 @@ impl IntoResponse for models::Response {
4755
(&HEADER_DTYPE, self.dtype.to_string().to_lowercase()),
4856
(&HEADER_SHAPE, serde_json::to_string(&self.shape).unwrap()),
4957
(&HEADER_COUNT, serde_json::to_string(&self.count).unwrap()),
58+
(&HEADER_BYTE_ORDER, HEADER_BYTE_ORDER_VALUE.to_string()),
5059
],
5160
self.body,
5261
)
@@ -159,12 +168,22 @@ async fn operation_handler<T: operation::Operation>(
159168
ValidatedJson(request_data): ValidatedJson<models::RequestData>,
160169
) -> Result<models::Response, ActiveStorageError> {
161170
let data = download_object(&auth, &request_data).await?;
162-
let data = filter_pipeline::filter_pipeline(&request_data, &data)?;
171+
let ptr = data.as_ptr();
172+
let data = filter_pipeline::filter_pipeline(&request_data, data)?;
163173
if request_data.compression.is_some() || request_data.size.is_none() {
164174
// Validate the raw uncompressed data size now that we know it.
165175
models::validate_raw_size(data.len(), request_data.dtype, &request_data.shape)?;
166176
}
167-
T::execute(&request_data, &data)
177+
if request_data.compression.is_none() && request_data.filters.is_none() {
178+
// Assert that we're using zero-copy.
179+
assert_eq!(ptr, data.as_ptr());
180+
}
181+
// Convert to a mutable vector to allow in-place byte order conversion.
182+
let ptr = data.as_ptr();
183+
let vec: Vec<u8> = data.into();
184+
// Assert that we're using zero-copy.
185+
assert_eq!(ptr, vec.as_ptr());
186+
T::execute(&request_data, vec)
168187
}
169188

170189
/// Handler for unknown operations

0 commit comments

Comments
 (0)