Skip to content

Commit 6e5df8f

Browse files
AaronOlittledivy
andauthored
perf(idna): fast-path simple/ascii domains (#761)
* perf(idna): fast-path simple/ascii domains * ci Co-authored-by: Divy Srivastava <[email protected]>
1 parent 1d307ae commit 6e5df8f

File tree

1 file changed

+38
-27
lines changed

1 file changed

+38
-27
lines changed

idna/src/uts46.rs

Lines changed: 38 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -318,50 +318,48 @@ fn check_validity(label: &str, config: Config, errors: &mut Errors) {
318318
// V8: Bidi rules are checked inside `processing()`
319319
}
320320

321-
/// http://www.unicode.org/reports/tr46/#Processing
322-
fn processing(
323-
domain: &str,
324-
config: Config,
325-
normalized: &mut String,
326-
output: &mut String,
327-
) -> Errors {
328-
// Weed out the simple cases: only allow all lowercase ASCII characters and digits where none
329-
// of the labels start with PUNYCODE_PREFIX and labels don't start or end with hyphen.
330-
let (mut prev, mut simple, mut puny_prefix) = ('?', !domain.is_empty(), 0);
321+
// Detect simple cases: all lowercase ASCII characters and digits where none
322+
// of the labels start with PUNYCODE_PREFIX and labels don't start or end with hyphen.
323+
fn is_simple(domain: &str) -> bool {
324+
if domain.is_empty() {
325+
return false;
326+
}
327+
let (mut prev, mut puny_prefix) = ('?', 0);
331328
for c in domain.chars() {
332329
if c == '.' {
333330
if prev == '-' {
334-
simple = false;
335-
break;
331+
return false;
336332
}
337333
puny_prefix = 0;
338334
continue;
339335
} else if puny_prefix == 0 && c == '-' {
340-
simple = false;
341-
break;
336+
return false;
342337
} else if puny_prefix < 5 {
343338
if c == ['x', 'n', '-', '-'][puny_prefix] {
344339
puny_prefix += 1;
345340
if puny_prefix == 4 {
346-
simple = false;
347-
break;
341+
return false;
348342
}
349343
} else {
350344
puny_prefix = 5;
351345
}
352346
}
353347
if !c.is_ascii_lowercase() && !c.is_ascii_digit() {
354-
simple = false;
355-
break;
348+
return false;
356349
}
357350
prev = c;
358351
}
359352

360-
if simple {
361-
output.push_str(domain);
362-
return Errors::default();
363-
}
353+
true
354+
}
364355

356+
/// http://www.unicode.org/reports/tr46/#Processing
357+
fn processing(
358+
domain: &str,
359+
config: Config,
360+
normalized: &mut String,
361+
output: &mut String,
362+
) -> Errors {
365363
normalized.clear();
366364
let mut errors = Errors::default();
367365
let offset = output.len();
@@ -447,11 +445,13 @@ impl Idna {
447445
}
448446
}
449447

450-
/// http://www.unicode.org/reports/tr46/#ToASCII
451-
#[allow(clippy::wrong_self_convention)]
452-
pub fn to_ascii<'a>(&'a mut self, domain: &str, out: &mut String) -> Result<(), Errors> {
453-
let mut errors = processing(domain, self.config, &mut self.normalized, &mut self.output);
454-
448+
pub fn to_ascii_inner(&mut self, domain: &str, out: &mut String) -> Errors {
449+
if is_simple(domain) {
450+
out.push_str(domain);
451+
return Errors::default();
452+
}
453+
let mut errors = processing(domain, self.config, &mut self.normalized, out);
454+
self.output = std::mem::replace(out, String::with_capacity(out.len()));
455455
let mut first = true;
456456
for label in self.output.split('.') {
457457
if !first {
@@ -470,6 +470,13 @@ impl Idna {
470470
}
471471
}
472472
}
473+
errors
474+
}
475+
476+
/// http://www.unicode.org/reports/tr46/#ToASCII
477+
#[allow(clippy::wrong_self_convention)]
478+
pub fn to_ascii<'a>(&'a mut self, domain: &str, out: &mut String) -> Result<(), Errors> {
479+
let mut errors = self.to_ascii_inner(domain, out);
473480

474481
if self.config.verify_dns_length {
475482
let domain = if out.ends_with('.') {
@@ -491,6 +498,10 @@ impl Idna {
491498
/// http://www.unicode.org/reports/tr46/#ToUnicode
492499
#[allow(clippy::wrong_self_convention)]
493500
pub fn to_unicode<'a>(&'a mut self, domain: &str, out: &mut String) -> Result<(), Errors> {
501+
if is_simple(domain) {
502+
out.push_str(domain);
503+
return Errors::default().into();
504+
}
494505
processing(domain, self.config, &mut self.normalized, out).into()
495506
}
496507
}

0 commit comments

Comments
 (0)