Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
107 changes: 107 additions & 0 deletions components/experimental/src/measure/measureunit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).

use super::{provider::single_unit::SingleUnit, single_unit_vec::SingleUnitVec};
use alloc::string::String;
use core::fmt::Write;

// TODO NOTE: the MeasureUnitParser takes the trie and the ConverterFactory takes the full payload and an instance of MeasureUnitParser.
/// The [`MeasureUnit`] struct represents a processed CLDR compound unit.
Expand Down Expand Up @@ -57,4 +59,109 @@ impl MeasureUnit {
pub fn constant_denominator(&self) -> u64 {
self.constant_denominator
}

/// Returns a short representation of this measure unit as follows:
/// 1. Each single unit will be represented by its short representation.
/// 2. The constant denominator, at the beginning of the short representation, will be represented by its value prefixed with `C`.
/// 2.1 If the constant denominator is greater than or equal to 1000 and has more than 3 trailing zeros, it will be represented in scientific notation.
///
/// # Examples
///
/// ```
/// use icu_experimental::measure::parser::MeasureUnitParser;
/// use icu_experimental::measure::measureunit::MeasureUnit;
///
///
/// let parser = MeasureUnitParser::new();
///
/// let measure_unit = parser.try_from_str("meter").unwrap();
/// let short_representation = measure_unit.generate_short_representation();
/// assert_eq!(short_representation, "I85", "{}", "meter");
///
///
/// let measure_unit = parser.try_from_str("square-meter").unwrap();
/// let short_representation = measure_unit.generate_short_representation();
/// assert_eq!(short_representation, "P2I85", "{}", "square-meter");
///
///
/// let measure_unit = parser.try_from_str("liter-per-100-kilometer").unwrap();
/// let short_representation = measure_unit.generate_short_representation();
/// assert_eq!(short_representation, "C100I82P-1D3I85", "{}", "liter-per-100-kilometer");
/// ```
pub fn generate_short_representation(&self) -> String {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is this actually intended to be a public API?

Copy link
Member Author

@younies younies Jun 23, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For now, yes. Once we start using it internally, it will be private.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what requires this to be public now? I'm worried that this will be forgotten and accidentally made public

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We will needed in the data gen

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

how will you make it private if it's used in datagen? should it be in provider?

Copy link
Member Author

@younies younies Jul 8, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we should have a field for the short unit id, but it is okay, the fn can be public for now

// Decomposes a number into its significant digits and counts the trailing zeros.
fn decompose_number_and_trailing_zeros(mut n: u64) -> (u64, u8) {
let mut zeros_count = 0;

for (divisor, zeros) in [(100_000_000, 8), (10_000, 4), (100, 2), (10, 1)] {
while n % divisor == 0 {
n /= divisor;
zeros_count += zeros;
}
}

(n, zeros_count)
}

// Convert the constant to scientific notation if it is a power of 10 with more than 3 trailing zeros
fn append_power_of_10_to_scientific(input: u64, buff: &mut String) {
if input < 1000 {
let _infallible = write!(buff, "{input}");
return;
}

let (significant_digits, zeros_count) = decompose_number_and_trailing_zeros(input);

if zeros_count > 3 {
let _infallible = write!(buff, "{significant_digits}E{zeros_count}");
return;
}

let _infallible = write!(buff, "{input}");
}

let mut short_representation = String::new();

if self.constant_denominator > 0 {
short_representation.push('C');
append_power_of_10_to_scientific(self.constant_denominator, &mut short_representation);
}

self.single_units.as_slice().iter().for_each(|single_unit| {
single_unit.append_short_representation(&mut short_representation)
});

short_representation
}
}

#[cfg(test)]
mod tests {
use icu::experimental::measure::parser::MeasureUnitParser;

#[test]
fn test_generate_short_representation() {
let parser = MeasureUnitParser::new();

let test_cases = vec![
("meter", "I85"),
("foot", "I50"),
("inch", "I66"),
("square-meter", "P2I85"),
("square-millimeter", "P2D-3I85"),
("micrometer", "D-6I85"),
("meter-per-second", "I85P-1I127"),
("liter-per-100-kilometer", "C100I82P-1D3I85"),
("portion-per-1e9", "C1E9I113"),
("per-10000000000-portion", "C1E10P-1I113"),
("liter-per-240000000000-kilometer", "C24E10I82P-1D3I85"),
("millimeter-per-square-microsecond", "D-3I85P-2D-6I127"),
];

for (full_unit, expected_short) in test_cases {
let measure_unit = parser.try_from_str(full_unit).unwrap();
let short_representation = measure_unit.generate_short_representation();
assert_eq!(short_representation, expected_short, "{full_unit}");
}
}
}
14 changes: 14 additions & 0 deletions components/experimental/src/measure/provider/si_prefix.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@
//!
//! Read more about data providers: [`icu_provider`]

use alloc::string::String;
use core::fmt::Write;

/// Represents the base of an si prefix.
#[zerovec::make_ule(BaseULE)]
#[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))]
Expand Down Expand Up @@ -39,3 +42,14 @@ pub struct SiPrefix {
/// The base of the si prefix.
pub base: Base,
}

impl SiPrefix {
/// Appends the short representation of the si prefix to the given string.
pub(crate) fn append_short_representation(&self, buff: &mut String) {
buff.push(match self.base {
Base::Decimal => 'D',
Base::Binary => 'B',
});
let _infallible = write!(buff, "{}", self.power);
}
}
24 changes: 24 additions & 0 deletions components/experimental/src/measure/provider/single_unit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).

use super::si_prefix::SiPrefix;
use alloc::string::String;
use core::fmt::Write;

/// Represents a single unit in a measure unit.
/// For example, the MeasureUnit `kilometer-per-square-second` contains two single units:
Expand All @@ -23,3 +25,25 @@ pub struct SingleUnit {
/// The id of the unit.
pub unit_id: u16,
}

impl SingleUnit {
/// Appends the short representation of the single unit to the given string.
///
/// The format of the short representation is as follows:
/// 1. If the power is not 1, the power is prefixed with "P" followed by the power value.
/// 2. If the si prefix power is not 0, the si prefix is represented by its base character ('D' for Decimal, 'B' for Binary) followed by the prefix power value.
/// 3. The unit ID is prefixed with "I" and appended to the string.
pub(crate) fn append_short_representation(&self, buff: &mut String) {
if self.power != 1 {
buff.push('P');
let _infallible = write!(buff, "{}", self.power);
}

if self.si_prefix.power != 0 {
self.si_prefix.append_short_representation(buff);
}

buff.push('I');
let _infallible = write!(buff, "{}", self.unit_id);
}
}