@@ -19,7 +19,10 @@ use std::mem::MaybeUninit;
19
19
use std:: ptr:: NonNull ;
20
20
21
21
pub use self :: bindings:: * ;
22
- use ruby_prism_sys:: { pm_comment_t, pm_constant_id_list_t, pm_constant_id_t, pm_diagnostic_t, pm_integer_t, pm_location_t, pm_magic_comment_t, pm_node_destroy, pm_node_list, pm_node_t, pm_parse, pm_parser_free, pm_parser_init, pm_parser_t} ;
22
+ use ruby_prism_sys:: {
23
+ pm_buffer_free, pm_buffer_init, pm_buffer_length, pm_buffer_t, pm_buffer_value, pm_comment_t, pm_constant_id_list_t, pm_constant_id_t, pm_diagnostic_t, pm_integer_t, pm_location_t, pm_magic_comment_t, pm_node_destroy, pm_node_list, pm_node_t, pm_options_read, pm_options_t, pm_options_version_t,
24
+ pm_parse, pm_parser_free, pm_parser_init, pm_parser_t, pm_serialize, pm_serialize_parse,
25
+ } ;
23
26
24
27
/// A range in the source file.
25
28
pub struct Location < ' pr > {
@@ -428,6 +431,8 @@ pub struct ParseResult<'pr> {
428
431
source : & ' pr [ u8 ] ,
429
432
parser : NonNull < pm_parser_t > ,
430
433
node : NonNull < pm_node_t > ,
434
+ options_string : Vec < u8 > ,
435
+ options : NonNull < pm_options_t > ,
431
436
}
432
437
433
438
impl < ' pr > ParseResult < ' pr > {
@@ -529,6 +534,16 @@ impl<'pr> ParseResult<'pr> {
529
534
pub fn node ( & self ) -> Node < ' _ > {
530
535
Node :: new ( self . parser , self . node . as_ptr ( ) )
531
536
}
537
+
538
+ /// Returns the serialized representation of the parse result.
539
+ #[ must_use]
540
+ pub fn serialize ( & self ) -> Vec < u8 > {
541
+ let mut buffer = Buffer :: default ( ) ;
542
+ unsafe {
543
+ pm_serialize ( self . parser . as_ptr ( ) , self . node . as_ptr ( ) , & mut buffer. buffer ) ;
544
+ }
545
+ buffer. value ( ) . into ( )
546
+ }
532
547
}
533
548
534
549
impl < ' pr > Drop for ParseResult < ' pr > {
@@ -537,10 +552,177 @@ impl<'pr> Drop for ParseResult<'pr> {
537
552
pm_node_destroy ( self . parser . as_ptr ( ) , self . node . as_ptr ( ) ) ;
538
553
pm_parser_free ( self . parser . as_ptr ( ) ) ;
539
554
drop ( Box :: from_raw ( self . parser . as_ptr ( ) ) ) ;
555
+
556
+ _ = self . options ;
557
+ _ = self . options_string ;
558
+
559
+ // pm_options_free(self.options.as_ptr());
560
+ // drop(Box::from_raw(self.options.as_ptr()));
561
+ }
562
+ }
563
+ }
564
+
565
+ /**
566
+ * A scope of locals surrounding the code that is being parsed.
567
+ */
568
+ #[ derive( Debug , Default , Clone ) ]
569
+ pub struct OptionsScope {
570
+ /** Flags for the set of forwarding parameters in this scope. */
571
+ pub forwarding_flags : u8 ,
572
+ /** The names of the locals in the scope. */
573
+ pub locals : Vec < String > ,
574
+ }
575
+
576
+ /// The options that can be passed to the parser.
577
+ #[ derive( Debug , Clone ) ]
578
+ pub struct Options {
579
+ /** The name of the file that is currently being parsed. */
580
+ pub filepath : String ,
581
+ /**
582
+ * The line within the file that the parse starts on. This value is
583
+ * 1-indexed.
584
+ */
585
+ pub line : i32 ,
586
+ /**
587
+ * The name of the encoding that the source file is in. Note that this must
588
+ * correspond to a name that can be found with Encoding.find in Ruby.
589
+ */
590
+ pub encoding : String ,
591
+ /**
592
+ * Whether or not the frozen string literal option has been set.
593
+ * May be:
594
+ * - PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED
595
+ * - PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED
596
+ * - PM_OPTIONS_FROZEN_STRING_LITERAL_UNSET
597
+ */
598
+ pub frozen_string_literal : Option < bool > ,
599
+ /** A bitset of the various options that were set on the command line. */
600
+ pub command_line : u8 ,
601
+ /**
602
+ * The version of prism that we should be parsing with. This is used to
603
+ * allow consumers to specify which behavior they want in case they need to
604
+ * parse exactly as a specific version of CRuby.
605
+ */
606
+ pub version : pm_options_version_t ,
607
+ /**
608
+ * Whether or not the encoding magic comments should be respected. This is a
609
+ * niche use-case where you want to parse a file with a specific encoding
610
+ * but ignore any encoding magic comments at the top of the file.
611
+ */
612
+ pub encoding_locked : bool ,
613
+ /**
614
+ * When the file being parsed is the main script, the shebang will be
615
+ * considered for command-line flags (or for implicit -x). The caller needs
616
+ * to pass this information to the parser so that it can behave correctly.
617
+ */
618
+ pub main_script : bool ,
619
+ /**
620
+ * When the file being parsed is considered a "partial" script, jumps will
621
+ * not be marked as errors if they are not contained within loops/blocks.
622
+ * This is used in the case that you're parsing a script that you know will
623
+ * be embedded inside another script later, but you do not have that context
624
+ * yet. For example, when parsing an ERB template that will be evaluated
625
+ * inside another script.
626
+ */
627
+ pub partial_script : bool ,
628
+ /**
629
+ * Whether or not the parser should freeze the nodes that it creates. This
630
+ * makes it possible to have a deeply frozen AST that is safe to share
631
+ * between concurrency primitives.
632
+ */
633
+ pub freeze : bool ,
634
+ /**
635
+ * The scopes surrounding the code that is being parsed. For most parses
636
+ * this will be empty, but for evals it will be the locals that are in scope
637
+ * surrounding the eval. Scopes are ordered from the outermost scope to the
638
+ * innermost one.
639
+ */
640
+ pub scopes : Vec < OptionsScope > ,
641
+ }
642
+
643
+ impl Default for Options {
644
+ fn default ( ) -> Self {
645
+ Self {
646
+ filepath : String :: new ( ) ,
647
+ line : 1 ,
648
+ encoding : String :: new ( ) ,
649
+ frozen_string_literal : None ,
650
+ command_line : 0 ,
651
+ version : pm_options_version_t:: PM_OPTIONS_VERSION_LATEST ,
652
+ encoding_locked : false ,
653
+ main_script : true ,
654
+ partial_script : false ,
655
+ freeze : false ,
656
+ scopes : Vec :: new ( ) ,
540
657
}
541
658
}
542
659
}
543
660
661
+ impl Options {
662
+ fn to_binary_string ( & self ) -> Vec < u8 > {
663
+ let mut output = Vec :: new ( ) ;
664
+
665
+ output. extend ( ( self . filepath . len ( ) as u32 ) . to_ne_bytes ( ) ) ;
666
+ output. extend ( self . filepath . as_bytes ( ) ) ;
667
+ output. extend ( self . line . to_ne_bytes ( ) ) ;
668
+ output. extend ( ( self . encoding . len ( ) as u32 ) . to_ne_bytes ( ) ) ;
669
+ output. extend ( self . encoding . as_bytes ( ) ) ;
670
+ output. extend ( self . frozen_string_literal . map_or_else ( || 0i8 , |frozen| if frozen { 1 } else { -1 } ) . to_ne_bytes ( ) ) ;
671
+ output. push ( self . command_line ) ;
672
+ output. extend ( ( self . version as u8 ) . to_ne_bytes ( ) ) ;
673
+ output. push ( self . encoding_locked . into ( ) ) ;
674
+ output. push ( self . main_script . into ( ) ) ;
675
+ output. push ( self . partial_script . into ( ) ) ;
676
+ output. push ( self . freeze . into ( ) ) ;
677
+ output. extend ( ( self . scopes . len ( ) as u32 ) . to_ne_bytes ( ) ) ;
678
+ for scope in & self . scopes {
679
+ output. extend ( ( scope. locals . len ( ) as u32 ) . to_ne_bytes ( ) ) ;
680
+ output. extend ( scope. forwarding_flags . to_ne_bytes ( ) ) ;
681
+ for local in & scope. locals {
682
+ output. extend ( ( local. len ( ) as u32 ) . to_ne_bytes ( ) ) ;
683
+ output. extend ( local. as_bytes ( ) ) ;
684
+ }
685
+ }
686
+ output
687
+ }
688
+ }
689
+
690
+ struct Buffer {
691
+ buffer : pm_buffer_t ,
692
+ }
693
+
694
+ impl Default for Buffer {
695
+ fn default ( ) -> Self {
696
+ let buffer = unsafe {
697
+ let mut uninit = MaybeUninit :: < pm_buffer_t > :: uninit ( ) ;
698
+ let initialized = pm_buffer_init ( uninit. as_mut_ptr ( ) ) ;
699
+ assert ! ( initialized) ;
700
+ uninit. assume_init ( )
701
+ } ;
702
+ Self { buffer }
703
+ }
704
+ }
705
+
706
+ impl Buffer {
707
+ fn length ( & self ) -> usize {
708
+ unsafe { pm_buffer_length ( & self . buffer ) }
709
+ }
710
+
711
+ fn value ( & self ) -> & [ u8 ] {
712
+ unsafe {
713
+ let value = pm_buffer_value ( & self . buffer ) ;
714
+ let value = value. cast :: < u8 > ( ) . cast_const ( ) ;
715
+ std:: slice:: from_raw_parts ( value, self . length ( ) )
716
+ }
717
+ }
718
+ }
719
+
720
+ impl Drop for Buffer {
721
+ fn drop ( & mut self ) {
722
+ unsafe { pm_buffer_free ( & mut self . buffer ) }
723
+ }
724
+ }
725
+
544
726
/// Parses the given source string and returns a parse result.
545
727
///
546
728
/// # Panics
@@ -549,25 +731,56 @@ impl<'pr> Drop for ParseResult<'pr> {
549
731
///
550
732
#[ must_use]
551
733
pub fn parse ( source : & [ u8 ] ) -> ParseResult < ' _ > {
734
+ parse_with_options ( source, & Options :: default ( ) )
735
+ }
736
+
737
+ /// Parses the given source string and returns a parse result.
738
+ ///
739
+ /// # Panics
740
+ ///
741
+ /// Panics if the parser fails to initialize.
742
+ ///
743
+ #[ must_use]
744
+ pub fn parse_with_options < ' pr > ( source : & ' pr [ u8 ] , options : & Options ) -> ParseResult < ' pr > {
745
+ let options_string = options. to_binary_string ( ) ;
552
746
unsafe {
553
747
let uninit = Box :: new ( MaybeUninit :: < pm_parser_t > :: uninit ( ) ) ;
554
748
let uninit = Box :: into_raw ( uninit) ;
555
749
556
- pm_parser_init ( ( * uninit) . as_mut_ptr ( ) , source. as_ptr ( ) , source. len ( ) , std:: ptr:: null ( ) ) ;
750
+ let options = Box :: into_raw ( Box :: new ( MaybeUninit :: < pm_options_t > :: zeroed ( ) ) ) ;
751
+ pm_options_read ( ( * options) . as_mut_ptr ( ) , options_string. as_ptr ( ) . cast ( ) ) ;
752
+ let options = NonNull :: new ( ( * options) . assume_init_mut ( ) ) . unwrap ( ) ;
753
+
754
+ pm_parser_init ( ( * uninit) . as_mut_ptr ( ) , source. as_ptr ( ) , source. len ( ) , options. as_ptr ( ) ) ;
557
755
558
756
let parser = ( * uninit) . assume_init_mut ( ) ;
559
757
let parser = NonNull :: new_unchecked ( parser) ;
560
758
561
759
let node = pm_parse ( parser. as_ptr ( ) ) ;
562
760
let node = NonNull :: new_unchecked ( node) ;
563
761
564
- ParseResult { source, parser, node }
762
+ ParseResult { source, parser, node, options_string, options }
763
+ }
764
+ }
765
+
766
+ /// Serializes the given source string and returns a parse result.
767
+ ///
768
+ /// # Panics
769
+ ///
770
+ /// Panics if the parser fails to initialize.
771
+ #[ must_use]
772
+ pub fn serialize_parse ( source : & [ u8 ] , options : & Options ) -> Vec < u8 > {
773
+ let mut buffer = Buffer :: default ( ) ;
774
+ let opts = options. to_binary_string ( ) ;
775
+ unsafe {
776
+ pm_serialize_parse ( & mut buffer. buffer , source. as_ptr ( ) , source. len ( ) , opts. as_ptr ( ) . cast ( ) ) ;
565
777
}
778
+ buffer. value ( ) . into ( )
566
779
}
567
780
568
781
#[ cfg( test) ]
569
782
mod tests {
570
- use super :: parse;
783
+ use super :: { parse, parse_with_options , serialize_parse } ;
571
784
572
785
#[ test]
573
786
fn comments_test ( ) {
@@ -1157,6 +1370,29 @@ end
1157
1370
assert ! ( ( value - 1.0 ) . abs( ) < f64 :: EPSILON ) ;
1158
1371
}
1159
1372
1373
+ #[ test]
1374
+ fn serialize_parse_test ( ) {
1375
+ let source = r#"__FILE__"# ;
1376
+ let mut options = crate :: Options :: default ( ) ;
1377
+ options. filepath = "test.rb" . to_string ( ) ;
1378
+ let bytes = serialize_parse ( source. as_ref ( ) , & options) ;
1379
+
1380
+ let result = parse_with_options ( source. as_bytes ( ) , & options) ;
1381
+
1382
+ assert_eq ! ( bytes, result. serialize( ) ) ;
1383
+
1384
+ let expected = r#"@ ProgramNode (location: (1,0)-(1,8))
1385
+ +-- locals: []
1386
+ +-- statements:
1387
+ @ StatementsNode (location: (1,0)-(1,8))
1388
+ +-- body: (length: 1)
1389
+ +-- @ SourceFileNode (location: (1,0)-(1,8))
1390
+ +-- StringFlags: nil
1391
+ +-- filepath: "test.rb"
1392
+ "# ;
1393
+ assert_eq ! ( expected, result. node( ) . pretty_print( ) . as_str( ) )
1394
+ }
1395
+
1160
1396
#[ test]
1161
1397
fn node_field_lifetime_test ( ) {
1162
1398
// The code below wouldn't typecheck prior to https://github.com/ruby/prism/pull/2519,
0 commit comments