@@ -23,7 +23,7 @@ pub(crate) enum Token {
23
23
ProcessingInstructionStart ,
24
24
/// `?>`
25
25
ProcessingInstructionEnd ,
26
- /// `<!DOCTYPE
26
+ /// `<!DOCTYPE…`
27
27
DoctypeStart ,
28
28
/// `<`
29
29
OpeningTagStart ,
@@ -80,7 +80,10 @@ impl fmt::Display for Token {
80
80
Token :: SingleQuote => "'" ,
81
81
Token :: DoubleQuote => "\" " ,
82
82
Token :: MarkupDeclarationStart => "<!" ,
83
- _ => unreachable ! ( )
83
+ Token :: Character ( _) => {
84
+ debug_assert ! ( false ) ;
85
+ ""
86
+ } ,
84
87
} . fmt ( f) ,
85
88
}
86
89
}
@@ -349,14 +352,15 @@ impl Lexer {
349
352
Ok ( Some ( Token :: Character ( ']' ) ) ) ,
350
353
State :: InvalidCDataClosing ( ClosingSubstate :: Second ) => {
351
354
self . eof_handled = false ;
352
- Ok ( self . move_to_with_unread ( State :: Normal , & [ ']' ] , Token :: Character ( ']' ) ) )
355
+ Ok ( Some ( self . move_to_with_unread ( State :: Normal , & [ ']' ] , Token :: Character ( ']' ) ) ) )
353
356
} ,
354
357
State :: Normal =>
355
358
Ok ( None ) ,
356
359
}
357
360
}
358
361
359
362
#[ cold]
363
+ #[ allow( clippy:: needless_pass_by_value) ]
360
364
fn error ( & self , e : SyntaxError ) -> Error {
361
365
Error {
362
366
pos : self . position ( ) ,
@@ -370,21 +374,21 @@ impl Lexer {
370
374
match self . st {
371
375
State :: Normal => Ok ( self . normal ( c) ) ,
372
376
State :: TagStarted => self . tag_opened ( c) ,
373
- State :: EmptyTagClosing => Ok ( self . empty_element_closing ( c) ) ,
377
+ State :: EmptyTagClosing => Ok ( Some ( self . empty_element_closing ( c) ) ) ,
374
378
State :: CommentOrCDataOrDoctypeStarted => self . comment_or_cdata_or_doctype_started ( c) ,
375
379
State :: InsideCdata => Ok ( self . inside_cdata ( c) ) ,
376
380
State :: CDataStarted ( s) => self . cdata_started ( c, s) ,
377
381
State :: InsideComment => Ok ( self . inside_comment_state ( c) ) ,
378
382
State :: CommentStarted => self . comment_started ( c) ,
379
383
State :: InsideProcessingInstruction => Ok ( self . inside_processing_instruction ( c) ) ,
380
- State :: ProcessingInstructionClosing => Ok ( self . processing_instruction_closing ( c) ) ,
384
+ State :: ProcessingInstructionClosing => Ok ( Some ( self . processing_instruction_closing ( c) ) ) ,
381
385
State :: CommentClosing ( s) => self . comment_closing ( c, s) ,
382
386
State :: CDataClosing ( s) => Ok ( self . cdata_closing ( c, s) ) ,
383
387
State :: InsideDoctype => Ok ( self . inside_doctype ( c) ) ,
384
388
State :: DoctypeStarted ( s) => self . doctype_started ( c, s) ,
385
389
State :: InvalidCDataClosing ( s) => Ok ( self . invalid_cdata_closing ( c, s) ) ,
386
390
State :: InsideMarkupDeclaration => self . markup_declaration ( c) ,
387
- State :: InsideMarkupDeclarationQuotedString ( q) => Ok ( self . markup_declaration_string ( c, q) ) ,
391
+ State :: InsideMarkupDeclarationQuotedString ( q) => Ok ( Some ( self . markup_declaration_string ( c, q) ) ) ,
388
392
}
389
393
}
390
394
@@ -395,19 +399,19 @@ impl Lexer {
395
399
}
396
400
397
401
#[ inline]
398
- fn move_to_with ( & mut self , st : State , token : Token ) -> Option < Token > {
402
+ fn move_to_with ( & mut self , st : State , token : Token ) -> Token {
399
403
self . st = st;
400
- Some ( token)
404
+ token
401
405
}
402
406
403
407
#[ inline]
404
- fn move_to_and_reset_normal ( & mut self , st : State , token : Token ) -> Option < Token > {
408
+ fn move_to_and_reset_normal ( & mut self , st : State , token : Token ) -> Token {
405
409
self . normal_state = st;
406
410
self . st = st;
407
- Some ( token)
411
+ token
408
412
}
409
413
410
- fn move_to_with_unread ( & mut self , st : State , cs : & [ char ] , token : Token ) -> Option < Token > {
414
+ fn move_to_with_unread ( & mut self , st : State , cs : & [ char ] , token : Token ) -> Token {
411
415
for c in cs. iter ( ) . rev ( ) . copied ( ) {
412
416
self . char_queue . push_front ( c) ;
413
417
}
@@ -442,7 +446,7 @@ impl Lexer {
442
446
let first = chars. next ( ) . unwrap_or ( '\0' ) ;
443
447
self . char_queue . extend ( chars) ;
444
448
self . char_queue . push_back ( c) ;
445
- return Ok ( self . move_to_with ( State :: Normal , Token :: Character ( first) ) ) ;
449
+ return Ok ( Some ( self . move_to_with ( State :: Normal , Token :: Character ( first) ) ) ) ;
446
450
}
447
451
Err ( self . error ( SyntaxError :: UnexpectedTokenBefore ( chunk, c) ) )
448
452
}
@@ -496,11 +500,11 @@ impl Lexer {
496
500
/// Encountered '<'
497
501
fn tag_opened ( & mut self , c : char ) -> Result {
498
502
match c {
499
- '?' => Ok ( self . move_to_with ( State :: InsideProcessingInstruction , Token :: ProcessingInstructionStart ) ) ,
500
- '/' => Ok ( self . move_to_with ( self . normal_state , Token :: ClosingTagStart ) ) ,
503
+ '?' => Ok ( Some ( self . move_to_with ( State :: InsideProcessingInstruction , Token :: ProcessingInstructionStart ) ) ) ,
504
+ '/' => Ok ( Some ( self . move_to_with ( self . normal_state , Token :: ClosingTagStart ) ) ) ,
501
505
'!' => Ok ( self . move_to ( State :: CommentOrCDataOrDoctypeStarted ) ) ,
502
- _ if is_whitespace_char ( c) => Ok ( self . move_to_with_unread ( self . normal_state , & [ c] , Token :: OpeningTagStart ) ) ,
503
- _ if is_name_char ( c) => Ok ( self . move_to_with_unread ( self . normal_state , & [ c] , Token :: OpeningTagStart ) ) ,
506
+ _ if is_whitespace_char ( c) => Ok ( Some ( self . move_to_with_unread ( self . normal_state , & [ c] , Token :: OpeningTagStart ) ) ) ,
507
+ _ if is_name_char ( c) => Ok ( Some ( self . move_to_with_unread ( self . normal_state , & [ c] , Token :: OpeningTagStart ) ) ) ,
504
508
_ => self . handle_error ( "<" , c)
505
509
}
506
510
}
@@ -512,7 +516,7 @@ impl Lexer {
512
516
'[' => Ok ( self . move_to ( State :: CDataStarted ( CDataStartedSubstate :: E ) ) ) ,
513
517
'D' => Ok ( self . move_to ( State :: DoctypeStarted ( DoctypeStartedSubstate :: D ) ) ) ,
514
518
'E' | 'A' | 'N' if matches ! ( self . normal_state, State :: InsideDoctype ) => {
515
- Ok ( self . move_to_with_unread ( State :: InsideMarkupDeclaration , & [ c] , Token :: MarkupDeclarationStart ) )
519
+ Ok ( Some ( self . move_to_with_unread ( State :: InsideMarkupDeclaration , & [ c] , Token :: MarkupDeclarationStart ) ) )
516
520
} ,
517
521
_ => self . handle_error ( "<!" , c) ,
518
522
}
@@ -521,7 +525,7 @@ impl Lexer {
521
525
/// Encountered '<!-'
522
526
fn comment_started ( & mut self , c : char ) -> Result {
523
527
match c {
524
- '-' => Ok ( self . move_to_with ( State :: InsideComment , Token :: CommentStart ) ) ,
528
+ '-' => Ok ( Some ( self . move_to_with ( State :: InsideComment , Token :: CommentStart ) ) ) ,
525
529
_ => self . handle_error ( "<!-" , c) ,
526
530
}
527
531
}
@@ -535,28 +539,28 @@ impl Lexer {
535
539
CD ; 'A' ; CDA ; "<![CD" ,
536
540
CDA ; 'T' ; CDAT ; "<![CDA" ,
537
541
CDAT ; 'A' ; CDATA ; "<![CDAT" ;
538
- CDATA ; '[' ; "<![CDATA" ; Ok ( self . move_to_with( State :: InsideCdata , Token :: CDataStart ) )
542
+ CDATA ; '[' ; "<![CDATA" ; Ok ( Some ( self . move_to_with( State :: InsideCdata , Token :: CDataStart ) ) )
539
543
)
540
544
}
541
545
542
546
/// Encountered '<!…' that isn't DOCTYPE or CDATA
543
547
fn markup_declaration ( & mut self , c : char ) -> Result {
544
548
match c {
545
549
'<' => self . handle_error ( "<!" , c) ,
546
- '>' => Ok ( self . move_to_with ( self . normal_state , Token :: TagEnd ) ) ,
550
+ '>' => Ok ( Some ( self . move_to_with ( self . normal_state , Token :: TagEnd ) ) ) ,
547
551
'&' => Ok ( Some ( Token :: ReferenceStart ) ) ,
548
552
';' => Ok ( Some ( Token :: ReferenceEnd ) ) ,
549
- '"' => Ok ( self . move_to_with ( State :: InsideMarkupDeclarationQuotedString ( QuoteStyle :: Double ) , Token :: DoubleQuote ) ) ,
550
- '\'' => Ok ( self . move_to_with ( State :: InsideMarkupDeclarationQuotedString ( QuoteStyle :: Single ) , Token :: SingleQuote ) ) ,
553
+ '"' => Ok ( Some ( self . move_to_with ( State :: InsideMarkupDeclarationQuotedString ( QuoteStyle :: Double ) , Token :: DoubleQuote ) ) ) ,
554
+ '\'' => Ok ( Some ( self . move_to_with ( State :: InsideMarkupDeclarationQuotedString ( QuoteStyle :: Single ) , Token :: SingleQuote ) ) ) ,
551
555
_ => Ok ( Some ( Token :: Character ( c) ) ) ,
552
556
}
553
557
}
554
558
555
- fn markup_declaration_string ( & mut self , c : char , q : QuoteStyle ) -> Option < Token > {
559
+ fn markup_declaration_string ( & mut self , c : char , q : QuoteStyle ) -> Token {
556
560
match c {
557
561
'"' if q == QuoteStyle :: Double => self . move_to_with ( State :: InsideMarkupDeclaration , Token :: DoubleQuote ) ,
558
562
'\'' if q == QuoteStyle :: Single => self . move_to_with ( State :: InsideMarkupDeclaration , Token :: SingleQuote ) ,
559
- _ => Some ( Token :: Character ( c) ) ,
563
+ _ => Token :: Character ( c) ,
560
564
}
561
565
}
562
566
@@ -569,14 +573,14 @@ impl Lexer {
569
573
DOC ; 'T' ; DOCT ; "<!DOC" ,
570
574
DOCT ; 'Y' ; DOCTY ; "<!DOCT" ,
571
575
DOCTY ; 'P' ; DOCTYP ; "<!DOCTY" ;
572
- DOCTYP ; 'E' ; "<!DOCTYP" ; Ok ( self . move_to_and_reset_normal( State :: InsideDoctype , Token :: DoctypeStart ) )
576
+ DOCTYP ; 'E' ; "<!DOCTYP" ; Ok ( Some ( self . move_to_and_reset_normal( State :: InsideDoctype , Token :: DoctypeStart ) ) )
573
577
)
574
578
}
575
579
576
580
/// State used while awaiting the closing bracket for the <!DOCTYPE tag
577
581
fn inside_doctype ( & mut self , c : char ) -> Option < Token > {
578
582
match c {
579
- '>' => self . move_to_and_reset_normal ( State :: Normal , Token :: TagEnd ) ,
583
+ '>' => Some ( self . move_to_and_reset_normal ( State :: Normal , Token :: TagEnd ) ) ,
580
584
'<' => self . move_to ( State :: TagStarted ) ,
581
585
'&' => Some ( Token :: ReferenceStart ) ,
582
586
';' => Some ( Token :: ReferenceEnd ) ,
@@ -587,15 +591,15 @@ impl Lexer {
587
591
}
588
592
589
593
/// Encountered '?'
590
- fn processing_instruction_closing ( & mut self , c : char ) -> Option < Token > {
594
+ fn processing_instruction_closing ( & mut self , c : char ) -> Token {
591
595
match c {
592
596
'>' => self . move_to_with ( self . normal_state , Token :: ProcessingInstructionEnd ) ,
593
597
_ => self . move_to_with_unread ( State :: InsideProcessingInstruction , & [ c] , Token :: Character ( '?' ) ) ,
594
598
}
595
599
}
596
600
597
601
/// Encountered '/'
598
- fn empty_element_closing ( & mut self , c : char ) -> Option < Token > {
602
+ fn empty_element_closing ( & mut self , c : char ) -> Token {
599
603
match c {
600
604
'>' => self . move_to_with ( self . normal_state , Token :: EmptyTagEnd ) ,
601
605
_ => self . move_to_with_unread ( self . normal_state , & [ c] , Token :: Character ( '/' ) ) ,
@@ -607,10 +611,10 @@ impl Lexer {
607
611
match s {
608
612
ClosingSubstate :: First => match c {
609
613
'-' => Ok ( self . move_to ( State :: CommentClosing ( ClosingSubstate :: Second ) ) ) ,
610
- _ => Ok ( self . move_to_with_unread ( State :: InsideComment , & [ c] , Token :: Character ( '-' ) ) ) ,
614
+ _ => Ok ( Some ( self . move_to_with_unread ( State :: InsideComment , & [ c] , Token :: Character ( '-' ) ) ) ) ,
611
615
} ,
612
616
ClosingSubstate :: Second => match c {
613
- '>' => Ok ( self . move_to_with ( self . normal_state , Token :: CommentEnd ) ) ,
617
+ '>' => Ok ( Some ( self . move_to_with ( self . normal_state , Token :: CommentEnd ) ) ) ,
614
618
// double dash not followed by a greater-than is a hard error inside comment
615
619
_ => self . handle_error ( "--" , c) ,
616
620
} ,
@@ -622,11 +626,11 @@ impl Lexer {
622
626
match s {
623
627
ClosingSubstate :: First => match c {
624
628
']' => self . move_to ( State :: CDataClosing ( ClosingSubstate :: Second ) ) ,
625
- _ => self . move_to_with_unread ( State :: InsideCdata , & [ c] , Token :: Character ( ']' ) ) ,
629
+ _ => Some ( self . move_to_with_unread ( State :: InsideCdata , & [ c] , Token :: Character ( ']' ) ) ) ,
626
630
} ,
627
631
ClosingSubstate :: Second => match c {
628
- '>' => self . move_to_with ( State :: Normal , Token :: CDataEnd ) ,
629
- _ => self . move_to_with_unread ( State :: InsideCdata , & [ ']' , c] , Token :: Character ( ']' ) ) ,
632
+ '>' => Some ( self . move_to_with ( State :: Normal , Token :: CDataEnd ) ) ,
633
+ _ => Some ( self . move_to_with_unread ( State :: InsideCdata , & [ ']' , c] , Token :: Character ( ']' ) ) ) ,
630
634
} ,
631
635
}
632
636
}
@@ -636,11 +640,11 @@ impl Lexer {
636
640
match s {
637
641
ClosingSubstate :: First => match c {
638
642
']' => self . move_to ( State :: InvalidCDataClosing ( ClosingSubstate :: Second ) ) ,
639
- _ => self . move_to_with_unread ( State :: Normal , & [ c] , Token :: Character ( ']' ) ) ,
643
+ _ => Some ( self . move_to_with_unread ( State :: Normal , & [ c] , Token :: Character ( ']' ) ) ) ,
640
644
} ,
641
645
ClosingSubstate :: Second => match c {
642
- '>' => self . move_to_with ( self . normal_state , Token :: CDataEnd ) ,
643
- _ => self . move_to_with_unread ( State :: Normal , & [ ']' , c] , Token :: Character ( ']' ) ) ,
646
+ '>' => Some ( self . move_to_with ( self . normal_state , Token :: CDataEnd ) ) ,
647
+ _ => Some ( self . move_to_with_unread ( State :: Normal , & [ ']' , c] , Token :: Character ( ']' ) ) ) ,
644
648
} ,
645
649
}
646
650
}
0 commit comments