Skip to content

Commit 3e2eafb

Browse files
committed
feat(spec): replace rust_decimal with fastnum for 38-digit precision
Replace rust_decimal with fastnum::D128 to support 38-digit decimal precision as required by the Iceberg spec. rust_decimal only supports 28-digit precision which is insufficient. Changes: - Add fastnum dependency, remove rust_decimal, num-bigint, num-traits - Add decimal_utils.rs with compatibility layer for fastnum - Update datum.rs, literal.rs for new decimal API - Update bucket.rs, truncate.rs transforms - Update parquet_writer.rs, arrow/schema.rs - Remove RUSTSEC-2024-0399 ignore from audit.toml Closes #669
1 parent 20ce7a5 commit 3e2eafb

File tree

17 files changed

+458
-127
lines changed

17 files changed

+458
-127
lines changed

.cargo/audit.toml

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,4 @@ ignore = [
3333
#
3434
# Introduced by object_store, see https://github.com/apache/arrow-rs-object-store/issues/564
3535
"RUSTSEC-2025-0134",
36-
37-
# Tracked here: https://github.com/paupino/rust-decimal/issues/766
38-
"RUSTSEC-2026-0001",
3936
]

Cargo.lock

Lines changed: 43 additions & 13 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,6 @@ mockall = "0.13.1"
9898
mockito = "1"
9999
motore-macros = "0.4.3"
100100
murmur3 = "0.5.2"
101-
num-bigint = "0.4.6"
102101
once_cell = "1.20"
103102
opendal = "0.55.0"
104103
ordered-float = "4"
@@ -110,7 +109,7 @@ rand = "0.8.5"
110109
regex = "1.11.3"
111110
reqwest = { version = "0.12.12", default-features = false, features = ["json"] }
112111
roaring = { version = "0.11" }
113-
rust_decimal = { version = "1.39", default-features = false, features = ["std"] }
112+
fastnum = { version = "0.7", default-features = false, features = ["std", "serde"] }
114113
serde = { version = "1.0.219", features = ["rc"] }
115114
serde_bytes = "0.11.17"
116115
serde_derive = "1.0.219"

bindings/python/Cargo.toml

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,6 @@ pyo3 = { version = "0.26", features = ["extension-module", "abi3-py310"] }
3737
iceberg-datafusion = { path = "../../crates/integrations/datafusion" }
3838
datafusion-ffi = { version = "51.0" }
3939
tokio = { version = "1.46.1", default-features = false }
40-
# Security: disable rkyv feature to avoid RUSTSEC-2026-0001 (rkyv 0.7.45 vulnerability)
41-
rust_decimal = { version = "1.39", default-features = false, features = ["std"] }
4240

4341
[profile.release]
4442
codegen-units = 1
@@ -48,5 +46,4 @@ opt-level = "z"
4846
strip = true
4947

5048
[package.metadata.cargo-machete]
51-
# rust_decimal is included to override feature flags for security (disable rkyv)
52-
ignored = ["rust_decimal"]
49+
ignored = []

crates/iceberg/Cargo.toml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,6 @@ futures = { workspace = true }
6767
itertools = { workspace = true }
6868
moka = { version = "0.12.10", features = ["future"] }
6969
murmur3 = { workspace = true }
70-
num-bigint = { workspace = true }
7170
once_cell = { workspace = true }
7271
opendal = { workspace = true }
7372
ordered-float = { workspace = true }
@@ -76,7 +75,7 @@ rand = { workspace = true }
7675
reqsign = { version = "0.16.3", optional = true, default-features = false }
7776
reqwest = { workspace = true }
7877
roaring = { workspace = true }
79-
rust_decimal = { workspace = true }
78+
fastnum = { workspace = true }
8079
serde = { workspace = true }
8180
serde_bytes = { workspace = true }
8281
serde_derive = { workspace = true }

crates/iceberg/src/arrow/schema.rs

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -27,13 +27,12 @@ use arrow_array::{
2727
TimestampMicrosecondArray,
2828
};
2929
use arrow_schema::{DataType, Field, Fields, Schema as ArrowSchema, TimeUnit};
30-
use num_bigint::BigInt;
3130
use parquet::arrow::PARQUET_FIELD_ID_META_KEY;
3231
use parquet::file::statistics::Statistics;
33-
use rust_decimal::prelude::ToPrimitive;
3432
use uuid::Uuid;
3533

3634
use crate::error::Result;
35+
use crate::spec::decimal_utils::i128_from_be_bytes;
3736
use crate::spec::{
3837
Datum, FIRST_FIELD_ID, ListType, MapType, NestedField, NestedFieldRef, PrimitiveLiteral,
3938
PrimitiveType, Schema, SchemaVisitor, StructType, Type,
@@ -680,7 +679,8 @@ impl SchemaVisitor for ToArrowSchemaConverter {
680679
DataType::FixedSizeBinary(16),
681680
)),
682681
crate::spec::PrimitiveType::Fixed(len) => Ok(ArrowSchemaOrFieldOrType::Type(
683-
len.to_i32()
682+
i32::try_from(*len)
683+
.ok()
684684
.map(DataType::FixedSizeBinary)
685685
.unwrap_or(DataType::LargeBinary),
686686
)),
@@ -722,10 +722,10 @@ pub(crate) fn get_arrow_datum(datum: &Datum) -> Result<Arc<dyn ArrowDatum + Send
722722
Ok(Arc::new(Int64Array::new_scalar(*value)))
723723
}
724724
(PrimitiveType::Float, PrimitiveLiteral::Float(value)) => {
725-
Ok(Arc::new(Float32Array::new_scalar(value.to_f32().unwrap())))
725+
Ok(Arc::new(Float32Array::new_scalar(value.into_inner())))
726726
}
727727
(PrimitiveType::Double, PrimitiveLiteral::Double(value)) => {
728-
Ok(Arc::new(Float64Array::new_scalar(value.to_f64().unwrap())))
728+
Ok(Arc::new(Float64Array::new_scalar(value.into_inner())))
729729
}
730730
(PrimitiveType::String, PrimitiveLiteral::String(value)) => {
731731
Ok(Arc::new(StringArray::new_scalar(value.as_str())))
@@ -835,10 +835,9 @@ pub(crate) fn get_parquet_stat_min_as_datum(
835835
let Some(bytes) = stats.min_bytes_opt() else {
836836
return Ok(None);
837837
};
838-
let unscaled_value = BigInt::from_signed_bytes_be(bytes);
839838
Some(Datum::new(
840839
primitive_type.clone(),
841-
PrimitiveLiteral::Int128(unscaled_value.to_i128().ok_or_else(|| {
840+
PrimitiveLiteral::Int128(i128_from_be_bytes(bytes).ok_or_else(|| {
842841
Error::new(
843842
ErrorKind::DataInvalid,
844843
format!("Can't convert bytes to i128: {bytes:?}"),
@@ -982,10 +981,9 @@ pub(crate) fn get_parquet_stat_max_as_datum(
982981
let Some(bytes) = stats.max_bytes_opt() else {
983982
return Ok(None);
984983
};
985-
let unscaled_value = BigInt::from_signed_bytes_be(bytes);
986984
Some(Datum::new(
987985
primitive_type.clone(),
988-
PrimitiveLiteral::Int128(unscaled_value.to_i128().ok_or_else(|| {
986+
PrimitiveLiteral::Int128(i128_from_be_bytes(bytes).ok_or_else(|| {
989987
Error::new(
990988
ErrorKind::DataInvalid,
991989
format!("Can't convert bytes to i128: {bytes:?}"),
@@ -1295,9 +1293,9 @@ mod tests {
12951293
use std::sync::Arc;
12961294

12971295
use arrow_schema::{DataType, Field, Schema as ArrowSchema, TimeUnit};
1298-
use rust_decimal::Decimal;
12991296

13001297
use super::*;
1298+
use crate::spec::decimal_utils::decimal_new;
13011299
use crate::spec::{Literal, Schema};
13021300

13031301
/// Create a simple field with metadata.
@@ -2127,7 +2125,7 @@ mod tests {
21272125
assert_eq!(array.value(0), 42);
21282126
}
21292127
{
2130-
let datum = Datum::decimal_with_precision(Decimal::new(123, 2), 30).unwrap();
2128+
let datum = Datum::decimal_with_precision(decimal_new(123, 2), 30).unwrap();
21312129
let arrow_datum = get_arrow_datum(&datum).unwrap();
21322130
let (array, is_scalar) = arrow_datum.get();
21332131
let array = array.as_any().downcast_ref::<Decimal128Array>().unwrap();

crates/iceberg/src/error.rs

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -408,12 +408,6 @@ define_from_err!(
408408
"Failed to parse json string"
409409
);
410410

411-
define_from_err!(
412-
rust_decimal::Error,
413-
ErrorKind::DataInvalid,
414-
"Failed to convert decimal literal to rust decimal"
415-
);
416-
417411
define_from_err!(
418412
parquet::errors::ParquetError,
419413
ErrorKind::Unexpected,

crates/iceberg/src/spec/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ pub use table_metadata::*;
5252
pub(crate) use table_metadata_builder::FIRST_FIELD_ID;
5353
pub use table_properties::*;
5454
pub use transform::*;
55+
pub(crate) use values::decimal_utils;
5556
pub use values::*;
5657
pub use view_metadata::*;
5758
pub use view_version::*;

crates/iceberg/src/spec/transform.rs

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ use std::str::FromStr;
2424
use fnv::FnvHashSet;
2525
use serde::{Deserialize, Deserializer, Serialize, Serializer};
2626

27+
use super::values::decimal_utils::decimal_from_i128_with_scale;
2728
use super::{Datum, PrimitiveLiteral};
2829
use crate::ErrorKind;
2930
use crate::error::{Error, Result};
@@ -660,7 +661,7 @@ impl Transform {
660661
(PrimitiveType::Int, PrimitiveLiteral::Int(v)) => Some(Datum::int(v - 1)),
661662
(PrimitiveType::Long, PrimitiveLiteral::Long(v)) => Some(Datum::long(v - 1)),
662663
(PrimitiveType::Decimal { .. }, PrimitiveLiteral::Int128(v)) => {
663-
Some(Datum::decimal(v - 1)?)
664+
Some(Datum::decimal(decimal_from_i128_with_scale(v - 1, 0))?)
664665
}
665666
(PrimitiveType::Date, PrimitiveLiteral::Int(v)) => Some(Datum::date(v - 1)),
666667
(PrimitiveType::Timestamp, PrimitiveLiteral::Long(v)) => {
@@ -672,7 +673,7 @@ impl Transform {
672673
(PrimitiveType::Int, PrimitiveLiteral::Int(v)) => Some(Datum::int(v + 1)),
673674
(PrimitiveType::Long, PrimitiveLiteral::Long(v)) => Some(Datum::long(v + 1)),
674675
(PrimitiveType::Decimal { .. }, PrimitiveLiteral::Int128(v)) => {
675-
Some(Datum::decimal(v + 1)?)
676+
Some(Datum::decimal(decimal_from_i128_with_scale(v + 1, 0))?)
676677
}
677678
(PrimitiveType::Date, PrimitiveLiteral::Int(v)) => Some(Datum::date(v + 1)),
678679
(PrimitiveType::Timestamp, PrimitiveLiteral::Long(v)) => {
@@ -806,7 +807,9 @@ impl Transform {
806807
match (datum.data_type(), datum.literal()) {
807808
(PrimitiveType::Int, PrimitiveLiteral::Int(v)) => Ok(Datum::int(v + 1)),
808809
(PrimitiveType::Long, PrimitiveLiteral::Long(v)) => Ok(Datum::long(v + 1)),
809-
(PrimitiveType::Decimal { .. }, PrimitiveLiteral::Int128(v)) => Datum::decimal(v + 1),
810+
(PrimitiveType::Decimal { .. }, PrimitiveLiteral::Int128(v)) => {
811+
Datum::decimal(decimal_from_i128_with_scale(v + 1, 0))
812+
}
810813
(PrimitiveType::Date, PrimitiveLiteral::Int(v)) => Ok(Datum::date(v + 1)),
811814
(PrimitiveType::Timestamp, PrimitiveLiteral::Long(v)) => {
812815
Ok(Datum::timestamp_micros(v + 1))
@@ -842,7 +845,9 @@ impl Transform {
842845
match (datum.data_type(), datum.literal()) {
843846
(PrimitiveType::Int, PrimitiveLiteral::Int(v)) => Ok(Datum::int(v - 1)),
844847
(PrimitiveType::Long, PrimitiveLiteral::Long(v)) => Ok(Datum::long(v - 1)),
845-
(PrimitiveType::Decimal { .. }, PrimitiveLiteral::Int128(v)) => Datum::decimal(v - 1),
848+
(PrimitiveType::Decimal { .. }, PrimitiveLiteral::Int128(v)) => {
849+
Datum::decimal(decimal_from_i128_with_scale(v - 1, 0))
850+
}
846851
(PrimitiveType::Date, PrimitiveLiteral::Int(v)) => Ok(Datum::date(v - 1)),
847852
(PrimitiveType::Timestamp, PrimitiveLiteral::Long(v)) => {
848853
Ok(Datum::timestamp_micros(v - 1))

0 commit comments

Comments
 (0)