diff --git a/Cargo.lock b/Cargo.lock index bc885f3982e..5614933fe27 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1642,6 +1642,17 @@ dependencies = [ "zerovec", ] +[[package]] +name = "icu_message" +version = "0.1.0" +dependencies = [ + "criterion", + "iai", + "icu_locid", + "intl-memoizer", + "smallvec", +] + [[package]] name = "icu_normalizer" version = "0.6.0" @@ -1929,6 +1940,16 @@ dependencies = [ "cfg-if 1.0.0", ] +[[package]] +name = "intl-memoizer" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c310433e4a310918d6ed9243542a6b83ec1183df95dff8f23f87bb88a264a66f" +dependencies = [ + "type-map", + "unic-langid", +] + [[package]] name = "ipnet" version = "2.3.1" @@ -3359,6 +3380,12 @@ dependencies = [ "syn", ] +[[package]] +name = "tinystr" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29738eedb4388d9ea620eeab9384884fc3f06f586a2eddb56bedc5885126c7c1" + [[package]] name = "tinystr" version = "0.4.11" @@ -3532,6 +3559,15 @@ version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "59547bce71d9c38b83d9c0e92b6066c4253371f15005def0c30d9657f50c7642" +[[package]] +name = "type-map" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6d3364c5e96cb2ad1603037ab253ddd34d7fb72a58bdddf4b7350760fc69a46" +dependencies = [ + "rustc-hash", +] + [[package]] name = "typenum" version = "1.15.0" @@ -3552,6 +3588,24 @@ version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "56dee185309b50d1f11bfedef0fe6d036842e3fb77413abef29f8f8d1c5d4c1c" +[[package]] +name = "unic-langid" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73328fcd730a030bdb19ddf23e192187a6b01cd98be6d3140622a89129459ce5" +dependencies = [ + "unic-langid-impl", +] + +[[package]] +name = "unic-langid-impl" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a4a8eeaf0494862c1404c95ec2f4c33a2acff5076f64314b465e3ddae1b934d" +dependencies = [ + "tinystr 0.3.4", +] + [[package]] name = "unicode-bidi" version = "0.3.8" diff --git a/Cargo.toml b/Cargo.toml index 4155b5965e1..df460ce8a05 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -23,6 +23,7 @@ members = [ "utils/databake", "utils/databake/derive", "experimental/segmenter", + "experimental/message", "ffi/capi_cdylib", "ffi/diplomat", "ffi/capi_staticlib", diff --git a/experimental/message/Cargo.toml b/experimental/message/Cargo.toml new file mode 100644 index 00000000000..91cde4056d1 --- /dev/null +++ b/experimental/message/Cargo.toml @@ -0,0 +1,27 @@ +[package] +name = "icu_message" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +smallvec = "1.6" +intl-memoizer = "0.5" +icu_locid = { path = "../../components/locid" } + +[dev-dependencies] +iai = "0.1" +criterion = "0.3.4" + +[[bench]] +name = "parser_iai" +harness = false + +[[bench]] +name = "parser" +harness = false + +[[bench]] +name = "mf" +harness = false diff --git a/experimental/message/benches/mf.rs b/experimental/message/benches/mf.rs new file mode 100644 index 00000000000..f6d5c2a2753 --- /dev/null +++ b/experimental/message/benches/mf.rs @@ -0,0 +1,76 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use icu_message::parser::Parser; +use icu_message::types::VariableType; +use icu_message::MessageFormat; +use std::collections::HashMap; + +fn overview_bench(c: &mut Criterion) { + let source = "{Hello World}"; + c.bench_function("message/format/simple/format_from_source", |b| { + let mf = MessageFormat::<&str>::new(); + b.iter(|| { + let _ = mf.format_from_source::<&str, &str>(black_box(source), None); + }) + }); + + c.bench_function("message/format/simple/format_to_string", |b| { + let mf = MessageFormat::<&str>::new(); + let parser = Parser::new(source); + let msg = parser.parse().unwrap(); + b.iter(|| { + let _ = mf.format_to_string::<&str, &str>(black_box(&msg), None); + }) + }); + + let source = "{Today is {$today}} a good day."; + let mut vars = HashMap::new(); + vars.insert("today".to_string(), VariableType::String("January 25 2022")); + + c.bench_function("message/format/placeholder/format_from_source", |b| { + let mf = MessageFormat::<&str>::new(); + b.iter(|| { + let _ = mf.format_from_source::<&str, _>(black_box(source), Some(&vars)); + }) + }); + + c.bench_function("message/format/placeholder/format_to_string", |b| { + let mf = MessageFormat::<&str>::new(); + let parser = Parser::new(source); + let msg = parser.parse().unwrap(); + b.iter(|| { + let _ = mf.format_to_string::<&str, &str>(black_box(&msg), Some(&vars)); + }) + }); +} + +fn compare_bench(c: &mut Criterion) { + let mut sources = vec![]; + for i in 0..99 { + let source = format!("{{Value {i}}}"); + sources.push(source); + } + + let messages: Vec<_> = sources + .iter() + .map(|s| { + let parser = Parser::new(s.as_str()); + parser.parse().unwrap() + }) + .collect(); + + c.bench_function("message/format/compare/simple", |b| { + let mf = MessageFormat::<&str>::new(); + b.iter(|| { + for msg in &messages { + let _ = mf.format_to_string::<_, &str>(black_box(msg), None); + } + }) + }); +} + +criterion_group!(benches, overview_bench, compare_bench); +criterion_main!(benches); diff --git a/experimental/message/benches/parser.rs b/experimental/message/benches/parser.rs new file mode 100644 index 00000000000..385da04251f --- /dev/null +++ b/experimental/message/benches/parser.rs @@ -0,0 +1,44 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use icu_message::parser::Parser; + +fn overview_bench(c: &mut Criterion) { + let source = "{Hello World}"; + c.bench_function("message/parse/simple", |b| { + b.iter(|| { + let parser = Parser::new(black_box(source)); + let _ = parser.parse(); + }) + }); + + let source = "{Today is {$today}} a good day."; + c.bench_function("message/parse/placeholder", |b| { + b.iter(|| { + let parser = Parser::new(black_box(source)); + let _ = parser.parse(); + }) + }); +} + +fn compare_bench(c: &mut Criterion) { + let mut messages = vec![]; + + for i in 0..99 { + messages.push(format!("{{Value {i}}}")); + } + + c.bench_function("message/parse/compare/simple", |b| { + b.iter(|| { + for msg in &messages { + let parser = Parser::new(black_box(msg.as_str())); + let _ = parser.parse(); + } + }) + }); +} + +criterion_group!(benches, overview_bench, compare_bench); +criterion_main!(benches); diff --git a/experimental/message/benches/parser_iai.rs b/experimental/message/benches/parser_iai.rs new file mode 100644 index 00000000000..830cd55245d --- /dev/null +++ b/experimental/message/benches/parser_iai.rs @@ -0,0 +1,13 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use icu_message::parser::Parser; + +fn iai_parse_message() { + let source = "{Hello World}"; + let parser = Parser::new(source); + let _ = parser.parse(); +} + +iai::main!(iai_parse_message,); diff --git a/experimental/message/src/ast.rs b/experimental/message/src/ast.rs new file mode 100644 index 00000000000..b785d9aa032 --- /dev/null +++ b/experimental/message/src/ast.rs @@ -0,0 +1,98 @@ +use smallvec::SmallVec; + +#[derive(Debug, PartialEq)] +pub struct Message { + pub declarations: SmallVec<[Declaration; 1]>, + pub value: MessageValue, +} + +#[derive(Debug, PartialEq)] +pub struct Declaration { + pub variable: S, + pub expression: Expression, +} + +#[derive(Debug, PartialEq)] +pub enum MessageValue { + Pattern(Pattern), + Select(Box>), +} + +#[derive(Debug, PartialEq)] +pub struct Select { + pub selector: SmallVec<[Expression; 1]>, + pub variants: SmallVec<[Variant; 3]>, +} + +#[derive(Debug, PartialEq)] +pub struct Variant { + pub key: SmallVec<[VariantKey; 1]>, + pub pattern: Pattern, +} + +#[derive(Debug, PartialEq)] +pub struct Pattern { + pub body: SmallVec<[PatternElement; 3]>, +} + +#[derive(Debug, PartialEq)] +pub enum PatternElement { + Text(S), + Placeholder(Placeholder), +} + +#[derive(Debug, PartialEq)] +pub enum Placeholder { + Markup { + name: S, + options: SmallVec<[Option; 1]>, + }, + MarkupEnd { + name: S, + }, + Expression(Expression), +} + +#[derive(Debug, PartialEq)] +pub enum Expression { + Operand { + operand: Operand, + annotation: std::option::Option>, + }, + Annotation(Annotation), +} + +#[derive(Debug, PartialEq)] +pub enum Operand { + Literal(Literal), + Variable(S), +} + +#[derive(Debug, PartialEq)] +pub struct Annotation { + pub function: S, + pub options: SmallVec<[Option; 1]>, +} + +#[derive(Debug, PartialEq)] +pub struct Literal { + pub value: S, +} + +#[derive(Debug, PartialEq)] +pub enum VariantKey { + Literal(Literal), + Asterisk, +} + +#[derive(Debug, PartialEq)] +pub struct Option { + name: S, + value: OptionValue, +} + +#[derive(Debug, PartialEq)] +pub enum OptionValue { + Literal(Literal), + Variable(S), +} diff --git a/experimental/message/src/functions/mod.rs b/experimental/message/src/functions/mod.rs new file mode 100644 index 00000000000..07952f2a7de --- /dev/null +++ b/experimental/message/src/functions/mod.rs @@ -0,0 +1,9 @@ +use crate::types::VariableType; + +pub struct Number; + +impl Number { + pub fn format(input: &VariableType) -> VariableType { + VariableType::String("Hello from function".to_string()) + } +} diff --git a/experimental/message/src/lib.rs b/experimental/message/src/lib.rs new file mode 100644 index 00000000000..6f14596e7e2 --- /dev/null +++ b/experimental/message/src/lib.rs @@ -0,0 +1,242 @@ +pub mod ast; +pub mod functions; +pub mod parser; +pub mod resolver; +pub mod types; + +use icu_locid::Locale; +use intl_memoizer::IntlMemoizer; +use parser::{slice::Slice, Parser}; +use resolver::{Resolver, Scope}; +use std::borrow::Cow; +use std::collections::HashMap; +use types::{MessagePart, VariableType}; + +pub type MF2Function<'b> = + Box Fn(&VariableType<&'s str>, &MessageFormat) -> Vec> + 'b>; + +#[derive(Default)] +pub struct MessageFormat<'b> { + pub intls: IntlMemoizer, + pub functions: HashMap>, +} + +impl<'b> MessageFormat<'b> { + pub fn new(_locale: Locale) -> Self { + Self { + intls: IntlMemoizer::default(), + functions: HashMap::default(), + } + } + + pub fn format_to_string<'m, 'mv, 'varsv, 'varsm, 'mf, 'mpv, MV, VARSV>( + &'mf self, + msg: &'m ast::Message, + variables: Option<&'varsm HashMap>>, + ) -> Cow<'mpv, str> + where + MV: Slice<'mv>, + VARSV: Slice<'varsv>, + 'mv: 'mpv, + 'varsv: 'mpv, + 'varsm: 'varsv, + { + let scope = Scope::new(self, variables); + Resolver::<_, _, Cow>::resolve_to_string(msg, &scope) + } + + pub fn format_to_parts<'m, 'mv, 'varsv, 'varsm, 'mf, 'mpv, MV, VARSV, MPV>( + &self, + msg: &ast::Message, + variables: Option<&'varsm HashMap>>, + ) -> Vec> + where + MV: Slice<'mv>, + VARSV: Slice<'varsv>, + MPV: 'mpv + Slice<'mpv>, + 'mv: 'mpv, + 'varsv: 'mpv, + 'varsm: 'varsv, + { + let scope = Scope::new(self, variables); + Resolver::resolve_to_parts(msg, &scope) + } + + pub fn format_from_source<'m, 'mv, 'varsv, 'varsm, 'mf, 'mpv, MV, VARSV>( + &'mf self, + source: MV, + variables: Option<&'varsm HashMap>>, + ) -> Cow<'mpv, str> + where + MV: 'm + Slice<'mv>, + VARSV: Slice<'varsv>, + 'mv: 'mpv, + 'varsv: 'mpv, + 'varsm: 'varsv, + { + let parser = Parser::new(source); + let msg: ast::Message = parser.parse().unwrap(); + self.format_to_string(&msg, variables) + } +} + +#[cfg(test)] +mod test { + use super::parser::Parser; + use super::types::{MessagePart, VariableType}; + use super::MessageFormat; + use crate::ast; + use icu_locid::locale; + use std::borrow::Cow; + use std::collections::HashMap; + + #[test] + fn sanity_check() { + let mf = MessageFormat::new(locale!("und")); + + let result = mf.format_from_source::<_, &str>("{Hello World}", None); + assert_eq!(result, "Hello World"); + } + + #[test] + fn variable_check() { + let mf = MessageFormat::new(locale!("und")); + + let mut variables = HashMap::new(); + variables.insert("name".into(), VariableType::String("John")); + + let result = mf.format_from_source("{{$name}}", Some(&variables)); + assert_eq!(result, "John"); + } + + #[test] + fn function_check() { + let mut mf = MessageFormat::new(locale!("und")); + mf.functions.insert( + "number".to_string(), + Box::new( + |input: &VariableType<&str>, mf: &MessageFormat| -> Vec> { + match input { + VariableType::Number(n) => { + let result = format!("{n}"); + vec![MessagePart::Literal(result)] + } + _ => todo!(), + } + }, + ), + ); + + let mut variables: HashMap<_, VariableType<&str>> = HashMap::new(); + variables.insert("emailCount".into(), VariableType::Number(5.0)); + + let result = mf.format_from_source( + "{You have {$emailCount :number} unread emails.}", + Some(&variables), + ); + assert_eq!(result, "You have 5 unread emails."); + } + + #[test] + fn dynamic_msg_check() { + let mut messages = HashMap::new(); + + let parser = Parser::new("{Dragon}"); + let dragon_msg = parser.parse().unwrap(); + let parser = Parser::new("{Golem}"); + let golem_msg = parser.parse().unwrap(); + + messages.insert("creature-dragon".to_string(), &dragon_msg); + messages.insert("creature-golem".to_string(), &golem_msg); + + let msg_ref = &messages; + + let mut mf = MessageFormat::new(locale!("und")); + + let message_function = + |input: &VariableType<&str>, mf: &MessageFormat| -> Vec> { + let id: &str = match input { + VariableType::MessageReference(s) => *s, + _ => todo!(), + }; + let msg = msg_ref.get(id).unwrap(); + let result = mf.format_to_string::<_, &str>(msg, None); + vec![MessagePart::Literal(result.to_string())] + }; + + mf.functions + .insert("message".to_string(), Box::new(message_function)); + + let mut variables = HashMap::new(); + variables.insert( + "monster".into(), + VariableType::MessageReference("creature-dragon"), + ); + + let result = mf.format_from_source("{{$monster :message} killed you.}", Some(&variables)); + assert_eq!(result, "Dragon killed you."); + } + + #[test] + fn function_preserve_parts() { + let mut mf = MessageFormat::new(locale!("und")); + mf.functions.insert( + "emphasis".to_string(), + Box::new( + |input: &VariableType<&str>, mf: &MessageFormat| -> Vec> { + let v = match input { + VariableType::String(s) => s, + _ => todo!(), + }; + vec![ + MessagePart::Markup { + name: "strong".to_string(), + }, + MessagePart::Literal(v.to_string()), + MessagePart::MarkupEnd { + name: "strong".to_string(), + }, + ] + }, + ), + ); + + let mut variables = HashMap::new(); + variables.insert("userName".into(), VariableType::String("John")); + + let result = mf.format_from_source("{Hello {$userName :emphasis}.}", Some(&variables)); + assert_eq!(result, "Hello {+strong}John{-strong}."); + } + + #[test] + fn markup_passthrough_check() { + let mf = MessageFormat::new(locale!("en-US")); + + let mut variables = HashMap::new(); + variables.insert( + "input-markup".into(), + VariableType::List(vec![ + VariableType::Markup { name: "strong" }, + VariableType::String("Hello World!"), + VariableType::MarkupEnd { name: "strong" }, + ]), + ); + + let parser = Parser::new("{{$input-markup}}"); + let msg = parser.parse().unwrap(); + + let result = mf.format_to_parts(&msg, Some(&variables)); + assert_eq!( + result, + vec![ + MessagePart::Markup { + name: Cow::Borrowed("strong") + }, + MessagePart::Literal(Cow::Borrowed("Hello World!")), + MessagePart::MarkupEnd { + name: Cow::Borrowed("strong") + }, + ] + ); + } +} diff --git a/experimental/message/src/parser/macros.rs b/experimental/message/src/parser/macros.rs new file mode 100644 index 00000000000..d7c8d9f14b5 --- /dev/null +++ b/experimental/message/src/parser/macros.rs @@ -0,0 +1,11 @@ +// macro_rules! get_byte { +// ($s:expr, $idx:expr) => { +// $s.source.as_ref().as_bytes().get($idx) +// }; +// } + +macro_rules! get_current_byte { + ($s:expr) => { + $s.source.byte_at($s.ptr) + }; +} diff --git a/experimental/message/src/parser/mod.rs b/experimental/message/src/parser/mod.rs new file mode 100644 index 00000000000..1ee87f8becc --- /dev/null +++ b/experimental/message/src/parser/mod.rs @@ -0,0 +1,425 @@ +pub mod slice; +#[macro_use] +mod macros; + +use super::ast; +use slice::Slice; +use smallvec::SmallVec; + +#[derive(Debug, Clone, PartialEq)] +pub enum ParserError { + Unknown, +} + +impl std::fmt::Display for ParserError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::Unknown => write!(f, "unknown"), + } + } +} + +impl std::error::Error for ParserError {} + +type ParserResult = Result; + +pub struct Parser { + source: S, + ptr: usize, +} + +impl<'s, S> Parser +where + S: Slice<'s>, +{ + #[inline] + fn next_if(&mut self, b: u8) -> bool { + let result = get_current_byte!(self) == Some(&b); + if result { + self.ptr += 1; + } + result + } + + #[inline] + fn next(&mut self) -> Option<&u8> { + let result = get_current_byte!(self); + self.ptr += 1; + result + } + + #[inline] + fn skip_ws(&mut self) { + while get_current_byte!(self) == Some(&b' ') { + self.ptr += 1; + } + } +} + +impl<'s, S> Parser +where + S: Slice<'s>, +{ + #[must_use] + pub const fn new(source: S) -> Self { + Self { source, ptr: 0 } + } + + pub fn parse(mut self) -> ParserResult> { + let mut declarations = SmallVec::new(); + + loop { + match self.next() { + Some(&b'l') => { + declarations.push(self.parse_declaration()?); + self.skip_ws(); + } + Some(b) => { + let value = match b { + b'{' => { + let pattern = self.parse_pattern()?; + ast::MessageValue::Pattern(pattern) + } + b'm' => { + let select = self.parse_select()?; + ast::MessageValue::Select(Box::new(select)) + } + _ => { + unreachable!(); + } + }; + return Ok(ast::Message { + declarations, + value, + }); + } + _ => { + unreachable!(); + } + } + } + } + + fn parse_declaration(&mut self) -> ParserResult> { + assert_eq!(self.next(), Some(&b'e')); + assert_eq!(self.next(), Some(&b't')); + self.skip_ws(); + + assert_eq!(self.next(), Some(&b'$')); + let variable = self.parse_name()?; + + self.skip_ws(); + assert_eq!(self.next(), Some(&b'=')); + self.skip_ws(); + assert_eq!(self.next(), Some(&b'{')); + let expression = self.parse_expression()?; + assert_eq!(self.next(), Some(&b'}')); + + Ok(ast::Declaration { + variable, + expression, + }) + } + + fn parse_select(&mut self) -> ParserResult> { + assert_eq!(self.next(), Some(&b'a')); + assert_eq!(self.next(), Some(&b't')); + assert_eq!(self.next(), Some(&b'c')); + assert_eq!(self.next(), Some(&b'h')); + let mut selector = SmallVec::new(); + let mut variants = SmallVec::new(); + + self.skip_ws(); + + while self.next_if(b'{') { + selector.push(self.parse_expression()?); + assert_eq!(self.next(), Some(&b'}')); + self.skip_ws(); + } + + while self.next_if(b'w') { + variants.push(self.parse_variant()?); + self.skip_ws(); + } + + Ok(ast::Select { selector, variants }) + } + + fn parse_variant(&mut self) -> ParserResult> { + assert_eq!(self.next(), Some(&b'h')); + assert_eq!(self.next(), Some(&b'e')); + assert_eq!(self.next(), Some(&b'n')); + let mut key = SmallVec::new(); + + self.skip_ws(); + + if self.next_if(b'*') { + key.push(ast::VariantKey::Asterisk); + } + + self.skip_ws(); + + assert_eq!(self.next(), Some(&b'{')); + + let pattern = self.parse_pattern()?; + + Ok(ast::Variant { key, pattern }) + } + + fn parse_pattern(&mut self) -> ParserResult> { + let mut start = self.ptr; + let mut body = SmallVec::new(); + while let Some(b) = self.next() { + match b { + b'}' => { + let end = self.ptr - 1; + if start != end { + body.push(ast::PatternElement::Text(self.source.slice(start..end))); + } + return Ok(ast::Pattern { body }); + } + b'{' => { + let end = self.ptr - 1; + if start != end { + body.push(ast::PatternElement::Text(self.source.slice(start..end))); + } + body.push(ast::PatternElement::Placeholder(self.parse_placeholder()?)); + start = self.ptr; + } + _ => {} + } + } + unreachable!() + } + + fn parse_placeholder(&mut self) -> ParserResult> { + let placeholder = match get_current_byte!(self) { + Some(b'+') => { + self.ptr += 1; + let name = self.parse_name()?; + let options = SmallVec::new(); + ast::Placeholder::Markup { name, options } + } + Some(b'-') => { + self.ptr += 1; + let name = self.parse_name()?; + ast::Placeholder::MarkupEnd { name } + } + Some(_) => { + let exp = self.parse_expression()?; + ast::Placeholder::Expression(exp) + } + None => { + unreachable!(); + } + }; + assert_eq!(self.next(), Some(&b'}')); + Ok(placeholder) + } + + fn parse_expression(&mut self) -> ParserResult> { + let operand = self.parse_operand()?; + let annotation = if self.next_if(b' ') { + Some(self.parse_annotation()?) + } else { + None + }; + Ok(ast::Expression::Operand { + operand, + annotation, + }) + } + + fn parse_operand(&mut self) -> ParserResult> { + let op = match self.next() { + Some(b'$') => ast::Operand::Variable(self.parse_name()?), + Some(b'(') => ast::Operand::Literal(self.parse_literal()?), + _ => { + unreachable!() + } + }; + Ok(op) + } + + fn parse_annotation(&mut self) -> ParserResult> { + assert_eq!(self.next(), Some(&b':')); + let name = self.parse_name()?; + Ok(ast::Annotation { + function: name, + options: SmallVec::new(), + }) + } + + fn parse_name(&mut self) -> ParserResult { + let start = self.ptr; + if let Some(ch) = self.next() { + assert!(ch.is_ascii_alphabetic()); + } else { + unreachable!(); + } + + while let Some(b) = get_current_byte!(self) { + if b.is_ascii_alphabetic() || *b == b'-' { + self.ptr += 1; + } else { + break; + } + } + if start == self.ptr { + unreachable!(); + } else { + Ok(self.source.slice(start..self.ptr)) + } + } + + fn parse_literal(&mut self) -> ParserResult> { + let start = self.ptr; + while let Some(b) = self.next() { + if b == &b')' { + break; + } + } + + if start == self.ptr - 1 { + unreachable!(); + } else { + Ok(ast::Literal { + value: self.source.slice(start..self.ptr - 1), + }) + } + } +} + +#[cfg(test)] +mod tests { + use super::ast; + use super::Parser; + use smallvec::SmallVec; + + #[test] + fn test_message() { + let source = "{Hello World}"; + let parser = Parser::new(source); + + let ast = parser.parse(); + assert_eq!( + ast, + Ok(ast::Message { + declarations: SmallVec::new(), + value: ast::MessageValue::Pattern(ast::Pattern { + body: SmallVec::from_vec(vec![ast::PatternElement::Text("Hello World")]) + }) + }) + ); + } + + #[test] + fn test_placeholder() { + let source = "{Today is {$today} a good day.}"; + let parser = Parser::new(source); + + let ast = parser.parse(); + assert_eq!( + ast, + Ok(ast::Message { + declarations: SmallVec::new(), + value: ast::MessageValue::Pattern(ast::Pattern { + body: SmallVec::from_vec(vec![ + ast::PatternElement::Text("Today is "), + ast::PatternElement::Placeholder(ast::Placeholder::Expression( + ast::Expression::Operand { + operand: ast::Operand::Variable("today"), + annotation: None, + } + )), + ast::PatternElement::Text(" a good day."), + ]) + }) + }) + ); + } + + #[test] + fn test_literal() { + let source = "{Today is {(This is a Literal)}}"; + let parser = Parser::new(source); + + let ast = parser.parse(); + assert_eq!( + ast, + Ok(ast::Message { + declarations: SmallVec::new(), + value: ast::MessageValue::Pattern(ast::Pattern { + body: SmallVec::from_vec(vec![ + ast::PatternElement::Text("Today is "), + ast::PatternElement::Placeholder(ast::Placeholder::Expression( + ast::Expression::Operand { + operand: ast::Operand::Literal(ast::Literal { + value: "This is a Literal" + }), + annotation: None, + } + )), + ]) + }) + }) + ); + } + + #[test] + fn test_select() { + let source = "match {$var} when * {Zero}"; + let parser = Parser::new(source); + + let ast = parser.parse(); + assert_eq!( + ast, + Ok(ast::Message { + declarations: SmallVec::new(), + value: ast::MessageValue::Select(Box::new(ast::Select { + selector: SmallVec::from_vec(vec![ast::Expression::Operand { + operand: ast::Operand::Variable("var"), + annotation: None, + }]), + variants: SmallVec::from_vec(vec![ast::Variant { + key: SmallVec::from_vec(vec![ast::VariantKey::Asterisk,]), + pattern: ast::Pattern { + body: SmallVec::from_vec(vec![ast::PatternElement::Text("Zero"),]), + } + }]), + })) + }) + ); + } + + #[test] + fn test_declarations() { + let source = "let $foo = {$bar} {Welcome to {$foo}}"; + let parser = Parser::new(source); + + let ast = parser.parse(); + assert_eq!( + ast, + Ok(ast::Message { + declarations: SmallVec::from_vec(vec![ast::Declaration { + variable: "foo", + expression: ast::Expression::Operand { + operand: ast::Operand::Variable("bar"), + annotation: None, + }, + },]), + value: ast::MessageValue::Pattern(ast::Pattern { + body: SmallVec::from_vec(vec![ + ast::PatternElement::Text("Welcome to "), + ast::PatternElement::Placeholder(ast::Placeholder::Expression( + ast::Expression::Operand { + operand: ast::Operand::Variable("foo"), + annotation: None, + }, + )), + ]) + }) + }) + ); + } +} diff --git a/experimental/message/src/parser/slice.rs b/experimental/message/src/parser/slice.rs new file mode 100644 index 00000000000..93b15a7a2cc --- /dev/null +++ b/experimental/message/src/parser/slice.rs @@ -0,0 +1,130 @@ +use std::borrow::Cow; +use std::hash::Hash; +use std::ops::Range; + +pub trait Slice<'s>: Hash + PartialEq { + fn from_slice<'m, S: Slice<'m>>(input: &S) -> Self + where + 'm: 's; + fn from_cow(input: Cow<'s, str>) -> Self; + fn slice(&self, range: Range) -> Self; + fn byte_at(&self, ptr: usize) -> Option<&u8>; + fn as_str(&self) -> &str; + fn as_cow(&self) -> Cow<'s, str>; + fn into_cow(self) -> Cow<'s, str>; +} + +impl<'s> Slice<'s> for String { + fn from_cow(input: Cow<'s, str>) -> Self { + match input { + Cow::Borrowed(b) => b.to_string(), + Cow::Owned(o) => o, + } + } + + fn as_cow(&self) -> Cow<'s, str> { + Cow::Owned(self.clone()) + } + + fn from_slice<'m, S: Slice<'m>>(input: &S) -> Self + where + 'm: 's, + { + Self::from_cow(input.as_cow()) + } + + fn slice(&self, range: Range) -> Self { + self[range].to_string() + } + + fn byte_at(&self, ptr: usize) -> Option<&u8> { + self.as_bytes().get(ptr) + } + + fn as_str(&self) -> &str { + self.as_str() + } + + fn into_cow(self) -> Cow<'s, str> { + Cow::Owned(self) + } +} + +impl<'s> Slice<'s> for &'s str { + fn from_cow(input: Cow<'s, str>) -> Self { + match input { + Cow::Borrowed(b) => b, + Cow::Owned(_) => { + unimplemented!() + } + } + } + + fn as_cow(&self) -> Cow<'s, str> { + Cow::Borrowed(self) + } + + fn from_slice<'m, S: Slice<'m>>(input: &S) -> Self + where + 'm: 's, + { + Self::from_cow(input.as_cow()) + } + + #[inline] + fn slice(&self, range: Range) -> Self { + &self[range] + } + + #[inline] + fn byte_at(&self, ptr: usize) -> Option<&u8> { + self.as_bytes().get(ptr) + } + + fn as_str(&self) -> &str { + self + } + + fn into_cow(self) -> Cow<'s, str> { + Cow::Borrowed(self) + } +} + +impl<'s> Slice<'s> for Cow<'s, str> { + fn from_cow(input: Cow<'s, str>) -> Self { + input + } + + fn as_cow(&self) -> Cow<'s, str> { + self.clone() + } + + fn from_slice<'m, S: Slice<'m>>(input: &S) -> Self + where + 'm: 's, + { + Self::from_cow(input.as_cow()) + } + + #[inline] + fn slice(&self, range: Range) -> Self { + todo!() + // match self { + // Cow::Borrowed(s) => Cow::Borrowed(&self[range]), + // Cow::Owned(_) => todo!(), + // } + } + + #[inline] + fn byte_at(&self, ptr: usize) -> Option<&u8> { + self.as_bytes().get(ptr) + } + + fn as_str(&self) -> &str { + self.as_ref() + } + + fn into_cow(self) -> Cow<'s, str> { + self + } +} diff --git a/experimental/message/src/resolver/collector.rs b/experimental/message/src/resolver/collector.rs new file mode 100644 index 00000000000..874781c68d9 --- /dev/null +++ b/experimental/message/src/resolver/collector.rs @@ -0,0 +1,78 @@ +use crate::{parser::slice::Slice, types::MessagePart}; +use std::borrow::Cow; + +// MPV - message part value type +pub trait MessagePartCollector { + fn push_part(&mut self, part: MessagePart); +} + +pub struct MessagePartsList(pub Vec>); +pub struct MessageString<'s>(pub Cow<'s, str>); +pub struct MessageSink(W); + +impl MessagePartsList { + pub fn new() -> Self { + Self(vec![]) + } +} + +impl<'s> MessageString<'s> { + pub fn new() -> Self { + Self("".into()) + } +} + +impl MessageSink { + pub fn new(sink: W) -> Self { + Self(sink) + } +} + +impl MessagePartCollector for MessagePartsList { + fn push_part(&mut self, part: MessagePart) { + self.0.push(part); + } +} + +impl<'s, MPV: Slice<'s>> MessagePartCollector for MessageString<'s> { + fn push_part(&mut self, part: MessagePart) { + let new_part = match part { + MessagePart::Literal(l) => l.into_cow(), + MessagePart::Markup { name } => { + let name = name.as_str(); + let result = format!("{{+{name}}}"); + result.into_cow() + } + MessagePart::MarkupEnd { name } => { + let name = name.as_str(); + let result = format!("{{-{name}}}"); + result.into_cow() + } + }; + if !new_part.is_empty() { + if self.0.is_empty() { + self.0 = new_part; + } else { + self.0.to_mut().push_str(&new_part); + } + } + } +} + +impl<'s, MPV: 's + Slice<'s>, W: std::fmt::Write> MessagePartCollector for MessageSink { + fn push_part(&mut self, part: MessagePart) { + match part { + MessagePart::Literal(l) => self.0.write_str(l.as_str()).unwrap(), + MessagePart::Markup { name } => { + self.0.write_str("{{+").unwrap(); + self.0.write_str(name.as_str()).unwrap(); + self.0.write_str("}}").unwrap(); + } + MessagePart::MarkupEnd { name } => { + self.0.write_str("{{-").unwrap(); + self.0.write_str(name.as_str()).unwrap(); + self.0.write_str("}}").unwrap(); + } + } + } +} diff --git a/experimental/message/src/resolver/mod.rs b/experimental/message/src/resolver/mod.rs new file mode 100644 index 00000000000..9fbef6c41bf --- /dev/null +++ b/experimental/message/src/resolver/mod.rs @@ -0,0 +1,361 @@ +mod collector; +mod scope; + +use collector::*; +pub use scope::Scope; + +use super::ast; +use super::parser::slice::Slice; +use super::types::{MessagePart, VariableType}; +use crate::MF2Function; +use std::borrow::Cow; + +// MV - message value type +// VARSV - variables value type +// MSGSV - messages value type +// MPV - message parts value type +pub struct Resolver { + p1: std::marker::PhantomData, + p2: std::marker::PhantomData, + p4: std::marker::PhantomData, +} + +// 'm - message lifetime +// 'mv - message value lifetime +// 'varsm - variables map lifetime +// 'varsv - variables values lifetime +// 'msgsm - messages map lifetime +// 'msgsmv - messages map value lifetime +// 'msgsv - messages value lifetime +// 'scope - scope lifetime +// 'mpv - message parts value lifetime +impl<'b, 'm, 'mv, 'varsm, 'varsv, 'mf, 'scope, 'mpv, MV, VARSV, MPV> Resolver +where + MV: Slice<'mv>, + VARSV: Slice<'varsv>, + MPV: 'mpv + Slice<'mpv>, + 'mv: 'mpv, + 'varsv: 'mpv, + 'varsm: 'varsv, +{ + pub fn resolve_to_parts( + msg: &'m ast::Message, + scope: &'scope Scope<'b, 'mf, 'varsm, VARSV>, + ) -> Vec> { + let mut collector = MessagePartsList::new(); + Self::resolve_message_to_collector(msg, scope, &mut collector); + collector.0 + } + + pub fn resolve_to_string( + msg: &'m ast::Message, + scope: &'scope Scope<'b, 'mf, 'varsm, VARSV>, + ) -> Cow<'mpv, str> { + let mut collector = MessageString::new(); + Self::resolve_message_to_collector(msg, scope, &mut collector); + collector.0 + } + + pub fn resolve_to_sink( + msg: &'m ast::Message, + scope: &'scope Scope<'b, 'mf, 'varsm, VARSV>, + sink: W, + ) { + let mut collector = MessageSink::new(sink); + Self::resolve_message_to_collector(msg, scope, &mut collector); + } + + fn resolve_message_to_collector( + msg: &'m ast::Message, + scope: &'scope Scope<'b, 'mf, 'varsm, VARSV>, + collector: &mut C, + ) where + C: MessagePartCollector, + { + let value = &msg.value; + let pattern = match value { + ast::MessageValue::Pattern(pattern) => pattern, + ast::MessageValue::Select(_) => todo!(), + }; + for pe in &pattern.body { + Self::resolve_pattern_element(pe, scope, collector); + } + } + + fn resolve_pattern_element( + pe: &'m ast::PatternElement, + scope: &'scope Scope<'b, 'mf, 'varsm, VARSV>, + collector: &mut C, + ) where + C: MessagePartCollector, + { + match pe { + ast::PatternElement::Text(s) => { + collector.push_part(MessagePart::Literal(MPV::from_slice(s))) + } + ast::PatternElement::Placeholder(p) => Self::resolve_placeholder(p, scope, collector), + } + } + + fn resolve_placeholder( + placeholder: &'m ast::Placeholder, + scope: &'scope Scope<'b, 'mf, 'varsm, VARSV>, + collector: &mut C, + ) where + C: MessagePartCollector, + { + match placeholder { + ast::Placeholder::Markup { name, options } => todo!(), + ast::Placeholder::MarkupEnd { name } => todo!(), + ast::Placeholder::Expression(e) => Self::resolve_expression(e, scope, collector), + } + } + + fn resolve_expression( + exp: &'m ast::Expression, + scope: &'scope Scope<'b, 'mf, 'varsm, VARSV>, + collector: &mut C, + ) where + C: MessagePartCollector, + { + match exp { + ast::Expression::Operand { + operand, + annotation, + } => match operand { + ast::Operand::Literal(l) => { + collector.push_part(MessagePart::Literal(MPV::from_slice(&l.value))) + } + ast::Operand::Variable(v) => { + let var = Self::get_variable(v, scope).unwrap(); + if let Some(annotation) = annotation { + let func = Self::get_function(&annotation.function, scope).unwrap(); + let v: VariableType<&'varsv str> = var.as_ref(); + let result = func(&v, scope.mf); + for item in result { + match item { + MessagePart::Literal(s) => { + let s: Cow = Cow::Owned(s); + collector.push_part(MessagePart::Literal(MPV::from_cow(s))) + } + MessagePart::Markup { name } => { + collector.push_part(MessagePart::Markup { + name: MPV::from_slice(&name.to_owned()), + }); + } + MessagePart::MarkupEnd { name } => { + collector.push_part(MessagePart::MarkupEnd { + name: MPV::from_slice(&name.to_owned()), + }); + } + } + } + } else { + Self::resolve_variable(var, scope, collector); + } + } + }, + ast::Expression::Annotation(_) => todo!(), + } + } + + fn get_variable( + variable: &'m MV, + scope: &'scope Scope<'b, 'mf, 'varsm, VARSV>, + ) -> Option<&'varsm VariableType> { + scope.variables.and_then(|vars| vars.get(variable.as_str())) + } + + fn get_function( + function: &'m MV, + scope: &'scope Scope<'b, 'mf, 'varsm, VARSV>, + ) -> Option<&'mf MF2Function<'b>> { + scope.mf.functions.get(function.as_str()) + } + + fn resolve_variable( + var: &VariableType, + scope: &'scope Scope<'b, 'mf, 'varsm, VARSV>, + collector: &mut C, + ) where + C: MessagePartCollector, + V: Slice<'varsv>, + { + match var { + VariableType::String(s) => { + collector.push_part(MessagePart::Literal(MPV::from_slice(s.to_owned()))) + } + VariableType::Number(n) => { + let result = format!("{n}"); + collector.push_part(MessagePart::Literal(MPV::from_slice(&result))) + } + VariableType::MessageReference(id) => { + // if let Some(messages) = scope.messages { + // if let Some(msg) = messages.get(id.as_str()) { + // Resolver::resolve_message_to_collector(*msg, scope, collector); + // } else { + // todo!() + // } + // } else { + // todo!() + // } + } + VariableType::List(v) => { + for item in v { + Self::resolve_variable(item, scope, collector); + } + } + VariableType::Markup { name } => { + collector.push_part(MessagePart::Markup { + name: MPV::from_slice(name.to_owned()), + }); + } + VariableType::MarkupEnd { name } => { + collector.push_part(MessagePart::MarkupEnd { + name: MPV::from_slice(name.to_owned()), + }); + } + _ => todo!(), + } + } +} + +#[cfg(test)] +mod test { + use super::super::parser::Parser; + use super::super::types::{MessagePart, VariableType}; + use super::ast; + use super::{Resolver, Scope}; + use crate::MessageFormat; + use icu_locid::locale; + use smallvec::SmallVec; + use std::borrow::Cow; + use std::collections::HashMap; + + #[test] + fn sanity_check() { + let mf = MessageFormat::new(locale!("en-US")); + let source = "{Hello World}"; + let parser = Parser::new(source); + let msg = parser.parse().unwrap(); + + let mut variables = HashMap::new(); + variables.insert("name".into(), VariableType::String("John")); + let scope = Scope::new(&mf, Some(&variables)); + let string = Resolver::<_, _, &str>::resolve_to_string(&msg, &scope); + + assert_eq!(string, "Hello World"); + } + + #[test] + fn stay_borrowed_check() { + let mf = MessageFormat::new(locale!("en-US")); + + let msg = ast::Message { + declarations: Default::default(), + value: ast::MessageValue::Pattern(ast::Pattern { + body: SmallVec::from_vec(vec![ast::PatternElement::Text("Hello World")]), + }), + }; + + let scope = Scope::new(&mf, None); + let string = Resolver::<_, &str, &str>::resolve_to_string(&msg, &scope); + + assert!(matches!(string, Cow::Borrowed("Hello World"))); + + let scope = Scope::<&str>::new(&mf, None); + let parts = Resolver::<_, _, &str>::resolve_to_parts(&msg, &scope); + + assert_eq!(parts, vec![MessagePart::Literal("Hello World"),]); + + let mut sink = String::new(); + let scope = Scope::<&str>::new(&mf, None); + Resolver::<_, _, &str>::resolve_to_sink(&msg, &scope, &mut sink); + + assert_eq!(sink, "Hello World"); + } + + // #[test] + // fn lifetimes_check() { + // let mf = MessageFormat::new(); + // + // let parser = Parser::new("{Hello World{$name}{$creature}}"); + // let msg = parser.parse().unwrap(); + // // let parser = Parser::new("{Dragon}"); + // // let creature_msg = parser.parse().unwrap(); + // // let mut msgs = HashMap::new(); + // // msgs.insert("dragon".to_string(), &creature_msg); + // + // let mut variables = HashMap::new(); + // variables.insert("name".into(), VariableType::String("John")); + // variables.insert("creature".into(), VariableType::MessageReference("dragon")); + // let scope = Scope::new(&mf, Some(&variables)); + // let parts = Resolver::resolve_to_parts(&msg, &scope); + // + // assert_eq!( + // parts, + // vec![ + // MessagePart::Literal("Hello World"), + // MessagePart::Literal("John"), + // MessagePart::Literal("Dragon"), + // ] + // ); + // + // let parser = Parser::new("{{$name}}"); + // let msg = parser.parse().unwrap(); + // let string = Resolver::<_, _, &str>::resolve_to_string(&msg, &scope); + // assert!(matches!(string, Cow::Borrowed("John"))); + // + // let parser = Parser::new("{{$creature}}"); + // let msg = parser.parse().unwrap(); + // let string = Resolver::<_, _, &str>::resolve_to_string(&msg, &scope); + // assert!(matches!(string, Cow::Borrowed("Dragon"))); + // } + + #[test] + fn allocate_check() { + let mf = MessageFormat::new(locale!("en-US")); + + let msg = ast::Message { + declarations: Default::default(), + value: ast::MessageValue::Pattern(ast::Pattern { + body: SmallVec::from_vec(vec![ + ast::PatternElement::Text("Hello "), + ast::PatternElement::Text("World"), + ]), + }), + }; + + let scope = Scope::<&str>::new(&mf, None); + let string = Resolver::<_, _, &str>::resolve_to_string(&msg, &scope); + + assert_eq!(string, Cow::::Owned(String::from("Hello World"))); + + let scope = Scope::<&str>::new(&mf, None); + let parts = Resolver::resolve_to_parts(&msg, &scope); + + assert_eq!( + parts, + vec![ + MessagePart::Literal("Hello "), + MessagePart::Literal("World"), + ] + ); + } + + #[test] + fn variable_check() { + let mf = MessageFormat::new(locale!("en-US")); + + let source = "{{$name}}"; + let parser = Parser::new(source); + let msg = parser.parse().unwrap(); + + let mut variables = HashMap::new(); + variables.insert("name".into(), VariableType::String("John")); + let scope = Scope::new(&mf, Some(&variables)); + let string = Resolver::<_, _, &str>::resolve_to_string(&msg, &scope); + + assert_eq!(string, "John"); + } +} diff --git a/experimental/message/src/resolver/scope.rs b/experimental/message/src/resolver/scope.rs new file mode 100644 index 00000000000..66545cdecbd --- /dev/null +++ b/experimental/message/src/resolver/scope.rs @@ -0,0 +1,22 @@ +use crate::MessageFormat; +use crate::{ast, types::VariableType}; +use std::collections::HashMap; + +// 'vars - lifetime of variables map +// 'msgs - lifetime of messages map +// 'msgsv - lifetime of message values +// VARSV - variable value type +// MSGSV - messages value type +pub struct Scope<'b, 'mf, 'vars, VARSV> { + pub mf: &'mf MessageFormat<'b>, + pub variables: Option<&'vars HashMap>>, +} + +impl<'b, 'mf, 'vars, VARSV> Scope<'b, 'mf, 'vars, VARSV> { + pub fn new( + mf: &'mf MessageFormat<'b>, + variables: Option<&'vars HashMap>>, + ) -> Self { + Self { mf, variables } + } +} diff --git a/experimental/message/src/types.rs b/experimental/message/src/types.rs new file mode 100644 index 00000000000..baa013952b9 --- /dev/null +++ b/experimental/message/src/types.rs @@ -0,0 +1,40 @@ +use super::parser::slice::Slice; + +#[derive(Debug)] +pub enum VariableType { + String(S), + Number(f64), + MessageReference(S), + Markup { name: S }, + MarkupEnd { name: S }, + List(Vec>), + // Custom, // Passing a date +} + +impl<'a: 's, 's, S> VariableType +where + S: Slice<'s>, +{ + pub fn as_ref(&'a self) -> VariableType<&'s str> { + match self { + VariableType::String(s) => VariableType::String(s.as_str()), + VariableType::Number(n) => VariableType::Number(*n), + VariableType::MessageReference(s) => VariableType::MessageReference(s.as_str()), + VariableType::Markup { name } => VariableType::Markup { + name: name.as_str(), + }, + VariableType::MarkupEnd { name } => VariableType::MarkupEnd { + name: name.as_str(), + }, + VariableType::List(_) => todo!(), + } + } +} + +#[derive(Debug, PartialEq)] +pub enum MessagePart { + Literal(S), + Markup { name: S }, + MarkupEnd { name: S }, + // Custom +}