@@ -105,6 +105,21 @@ pub struct Token {
105
105
pub end_pos : usize ,
106
106
}
107
107
108
+ impl AsRef < Token > for Token {
109
+ fn as_ref ( & self ) -> & Token {
110
+ self
111
+ }
112
+ }
113
+
114
+ impl From < & Token > for SrcSpan {
115
+ fn from ( value : & Token ) -> Self {
116
+ SrcSpan {
117
+ start : value. start_pos ,
118
+ end : value. end_pos ,
119
+ }
120
+ }
121
+ }
122
+
108
123
pub struct Lexer < T : Iterator < Item = char > > {
109
124
istream : T ,
110
125
ostream : Vec < Token > ,
@@ -237,21 +252,17 @@ where
237
252
self . ostream . push ( token) ;
238
253
}
239
254
240
- fn space ( & mut self ) {
241
- while let Some ( c) = self . peek ( ) {
242
- match c {
243
- ' ' | '\t' | '\n' => {
244
- let _ = self . next_char ( ) ;
245
- }
246
- _ => break ,
247
- }
255
+ fn is_space ( & mut self , chr : char ) -> bool {
256
+ match chr {
257
+ ' ' | '\t' | '\n' => true ,
258
+ _ => false ,
248
259
}
249
260
}
250
261
251
- fn separator ( & mut self ) {
252
- if let Some ( ';' ) = self . peek ( ) {
253
- self . add_tok1 ( TokenType :: Semicolon ) ;
254
- self . next_char ( ) ;
262
+ fn is_separator ( & mut self , chr : char ) -> bool {
263
+ match chr {
264
+ ';' => true ,
265
+ _ => false ,
255
266
}
256
267
}
257
268
@@ -417,13 +428,14 @@ where
417
428
let mut literal = String :: new ( ) ;
418
429
let start_pos = self . get_pos ( ) ;
419
430
loop {
420
- match self . next_char ( ) {
431
+ match self . peek ( ) {
421
432
Some ( '\'' ) => {
422
433
break ;
423
434
} ,
424
435
Some ( chr) => {
425
436
// may cause reallocations, but it's not really important
426
437
literal. push ( chr) ;
438
+ self . next_char ( ) ;
427
439
} ,
428
440
None => return Err ( LexerError {
429
441
location : SrcSpan { start : start_pos, end : self . get_pos ( ) } ,
@@ -523,13 +535,23 @@ where
523
535
'+' => self . add_tok1 ( TokenType :: Plus ) ,
524
536
'-' => self . add_tok1 ( TokenType :: Minus ) ,
525
537
'*' => self . add_tok1 ( TokenType :: Star ) ,
526
- '/' => self . add_tok1 ( TokenType :: Slash ) ,
527
538
'(' => self . add_tok1 ( TokenType :: LeftParen ) ,
528
539
')' => self . add_tok1 ( TokenType :: RightParen ) ,
529
540
'[' => self . add_tok1 ( TokenType :: LeftBrack ) ,
530
541
']' => self . add_tok1 ( TokenType :: RightBrack ) ,
531
542
'=' => self . add_tok1 ( TokenType :: Equal ) ,
532
543
',' => self . add_tok1 ( TokenType :: Comma ) ,
544
+ '/' => {
545
+ self . next_char ( ) ;
546
+ match self . peek ( ) {
547
+ Some ( '/' ) => {
548
+ self . next_char ( ) ;
549
+ let comment = self . lex_comment ( ) ;
550
+ self . ostream . push ( comment) ;
551
+ } ,
552
+ _ => self . add_tok1 ( TokenType :: Slash ) ,
553
+ }
554
+ } ,
533
555
'<' => {
534
556
self . next_char ( ) ;
535
557
match self . peek ( ) {
@@ -566,7 +588,8 @@ where
566
588
_ => self . add_tok1 ( TokenType :: Bleat ) ,
567
589
}
568
590
} ,
569
- '"' => {
591
+ '\'' => {
592
+ self . next_char ( ) ;
570
593
let string = self . lex_string_literal ( ) ?;
571
594
self . ostream . push ( string) ;
572
595
} ,
@@ -586,17 +609,20 @@ where
586
609
587
610
fn consume_normal ( & mut self ) -> Result < ( ) > {
588
611
if let Some ( chr) = self . chr0 {
589
- self . space ( ) ;
590
- self . separator ( ) ;
591
-
592
- if self . is_ident_start ( chr) {
612
+ if self . is_space ( chr) {
613
+ self . next_char ( ) ;
614
+ } else if self . is_separator ( chr) {
615
+ self . add_tok1 ( TokenType :: Semicolon ) ;
616
+ self . next_char ( ) ;
617
+ } else if self . is_ident_start ( chr) {
593
618
let name = self . lex_ident ( ) ?;
594
619
self . add_tok ( name) ;
595
620
} else if self . is_number_start ( chr, self . chr1 ) {
596
621
let num = self . lex_number ( ) ?;
597
622
self . add_tok ( num) ;
598
623
} else {
599
624
self . consume_character ( chr) ?;
625
+ self . next_char ( ) ;
600
626
}
601
627
} else {
602
628
self . add_tok1 ( TokenType :: Eof ) ;
@@ -615,15 +641,15 @@ where
615
641
616
642
impl < T > Iterator for Lexer < T >
617
643
where
618
- T : Iterator <Item = char ,
644
+ T : Iterator < Item = char > ,
619
645
{
620
646
type Item = Result < Token > ;
621
647
622
648
fn next ( & mut self ) -> Option < Self :: Item > {
623
- let token = self . lex ( ) ;
649
+ let token = self . inner_next ( ) ;
624
650
625
651
match token {
626
- Ok ( ( _ , Token :: Eof , _ ) ) => None ,
652
+ Ok ( Token { kind : TokenType :: Eof , .. } ) => None ,
627
653
r => Some ( r) ,
628
654
}
629
655
}
@@ -635,11 +661,15 @@ mod tests {
635
661
636
662
#[ test]
637
663
fn lex_arith ( ) {
638
- let mut lexer = Lexer :: new ( "y = x / (1 + 2) * 4" . chars ( ) ) ;
664
+ let lexer = Lexer :: new ( "y = x / (1 + 2) * 4" . chars ( ) ) ;
665
+ let result = lexer. into_iter ( ) . collect :: < Vec < _ > > ( ) ;
666
+ let result = result. into_iter ( )
667
+ . collect :: < Result < Vec < _ > , _ > > ( )
668
+ . unwrap ( ) . into_iter ( )
669
+ . map ( |tok| tok. kind )
670
+ . collect :: < Vec < _ > > ( ) ;
639
671
assert_eq ! (
640
- lexer. lex( ) . unwrap( )
641
- . into_iter( )
642
- . map( |token| token. kind) . collect:: <Vec <_>>( ) ,
672
+ result,
643
673
vec![
644
674
TokenType :: Identifier ( String :: from( "y" ) ) ,
645
675
TokenType :: Assignment ,
0 commit comments