Skip to content

Commit ffece4f

Browse files
committed
Expose prettyprint/serialize/parse options in rust
Signed-off-by: Samuel Giddins <[email protected]>
1 parent a1e7c89 commit ffece4f

File tree

3 files changed

+277
-4
lines changed

3 files changed

+277
-4
lines changed

rust/ruby-prism-sys/build/main.rs

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,7 @@ fn generate_bindings(ruby_include_path: &Path) -> bindgen::Bindings {
128128
// Enums
129129
.rustified_non_exhaustive_enum("pm_comment_type_t")
130130
.rustified_non_exhaustive_enum(r"pm_\w+_flags")
131+
.rustified_non_exhaustive_enum("pm_options_version_t")
131132
.rustified_non_exhaustive_enum("pm_node_type")
132133
.rustified_non_exhaustive_enum("pm_pack_encoding")
133134
.rustified_non_exhaustive_enum("pm_pack_endian")
@@ -138,20 +139,32 @@ fn generate_bindings(ruby_include_path: &Path) -> bindgen::Bindings {
138139
.rustified_non_exhaustive_enum("pm_pack_type")
139140
.rustified_non_exhaustive_enum("pm_pack_variant")
140141
// Functions
142+
.allowlist_function("pm_buffer_free")
143+
.allowlist_function("pm_buffer_init")
144+
.allowlist_function("pm_buffer_length")
145+
.allowlist_function("pm_buffer_value")
141146
.allowlist_function("pm_list_empty_p")
142147
.allowlist_function("pm_list_free")
143148
.allowlist_function("pm_node_destroy")
149+
.allowlist_function("pm_options_free")
150+
.allowlist_function("pm_options_read")
144151
.allowlist_function("pm_pack_parse")
145152
.allowlist_function("pm_parse")
146153
.allowlist_function("pm_parser_free")
147154
.allowlist_function("pm_parser_init")
155+
.allowlist_function("pm_prettyprint")
156+
.allowlist_function("pm_serialize_parse")
157+
.allowlist_function("pm_serialize")
148158
.allowlist_function("pm_size_to_native")
159+
.allowlist_function("pm_string_ensure_owned")
149160
.allowlist_function("pm_string_free")
150161
.allowlist_function("pm_string_length")
162+
.allowlist_function("pm_string_shared_init")
151163
.allowlist_function("pm_string_source")
152164
.allowlist_function("pm_version")
153165
// Vars
154166
.allowlist_var(r"^pm_encoding\S+")
167+
.allowlist_var(r"^PM_OPTIONS_COMMAND\S+")
155168
.generate()
156169
.expect("Unable to generate prism bindings")
157170
}

rust/ruby-prism/build.rs

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -616,6 +616,30 @@ impl<'pr> Node<'pr> {{
616616
writeln!(file, " }}")?;
617617
writeln!(file)?;
618618

619+
writeln!(
620+
file,
621+
r#"
622+
/// Returns a pretty-printed representation of this node.
623+
#[must_use]
624+
pub fn pretty_print(&self) -> String {{
625+
let mut buffer = crate::Buffer::default();
626+
unsafe {{
627+
match *self {{
628+
"#
629+
)?;
630+
for node in &config.nodes {
631+
writeln!(file, " Self::{} {{ pointer, parser, .. }} => pm_prettyprint(&mut buffer.buffer, parser.as_ptr(), pointer.cast()),", node.name)?;
632+
}
633+
writeln!(
634+
file,
635+
r#"
636+
}}
637+
std::str::from_utf8(buffer.value()).unwrap().to_string()
638+
}}
639+
}}
640+
"#
641+
)?;
642+
619643
writeln!(file, " /// Returns the location of this node.")?;
620644
writeln!(file, " #[must_use]")?;
621645
writeln!(file, " pub fn location(&self) -> Location<'pr> {{")?;

rust/ruby-prism/src/lib.rs

Lines changed: 240 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,10 @@ use std::mem::MaybeUninit;
1919
use std::ptr::NonNull;
2020

2121
pub use self::bindings::*;
22-
use ruby_prism_sys::{pm_comment_t, pm_constant_id_list_t, pm_constant_id_t, pm_diagnostic_t, pm_integer_t, pm_location_t, pm_magic_comment_t, pm_node_destroy, pm_node_list, pm_node_t, pm_parse, pm_parser_free, pm_parser_init, pm_parser_t};
22+
use ruby_prism_sys::{
23+
pm_buffer_free, pm_buffer_init, pm_buffer_length, pm_buffer_t, pm_buffer_value, pm_comment_t, pm_constant_id_list_t, pm_constant_id_t, pm_diagnostic_t, pm_integer_t, pm_location_t, pm_magic_comment_t, pm_node_destroy, pm_node_list, pm_node_t, pm_options_read, pm_options_t, pm_options_version_t,
24+
pm_parse, pm_parser_free, pm_parser_init, pm_parser_t, pm_serialize, pm_serialize_parse,
25+
};
2326

2427
/// A range in the source file.
2528
pub struct Location<'pr> {
@@ -428,6 +431,8 @@ pub struct ParseResult<'pr> {
428431
source: &'pr [u8],
429432
parser: NonNull<pm_parser_t>,
430433
node: NonNull<pm_node_t>,
434+
options_string: Vec<u8>,
435+
options: NonNull<pm_options_t>,
431436
}
432437

433438
impl<'pr> ParseResult<'pr> {
@@ -529,6 +534,16 @@ impl<'pr> ParseResult<'pr> {
529534
pub fn node(&self) -> Node<'_> {
530535
Node::new(self.parser, self.node.as_ptr())
531536
}
537+
538+
/// Returns the serialized representation of the parse result.
539+
#[must_use]
540+
pub fn serialize(&self) -> Vec<u8> {
541+
let mut buffer = Buffer::default();
542+
unsafe {
543+
pm_serialize(self.parser.as_ptr(), self.node.as_ptr(), &mut buffer.buffer);
544+
}
545+
buffer.value().into()
546+
}
532547
}
533548

534549
impl<'pr> Drop for ParseResult<'pr> {
@@ -537,10 +552,177 @@ impl<'pr> Drop for ParseResult<'pr> {
537552
pm_node_destroy(self.parser.as_ptr(), self.node.as_ptr());
538553
pm_parser_free(self.parser.as_ptr());
539554
drop(Box::from_raw(self.parser.as_ptr()));
555+
556+
_ = self.options;
557+
_ = self.options_string;
558+
559+
// pm_options_free(self.options.as_ptr());
560+
// drop(Box::from_raw(self.options.as_ptr()));
561+
}
562+
}
563+
}
564+
565+
/**
566+
* A scope of locals surrounding the code that is being parsed.
567+
*/
568+
#[derive(Debug, Default, Clone)]
569+
pub struct OptionsScope {
570+
/** Flags for the set of forwarding parameters in this scope. */
571+
pub forwarding_flags: u8,
572+
/** The names of the locals in the scope. */
573+
pub locals: Vec<String>,
574+
}
575+
576+
/// The options that can be passed to the parser.
577+
#[derive(Debug, Clone)]
578+
pub struct Options {
579+
/** The name of the file that is currently being parsed. */
580+
pub filepath: String,
581+
/**
582+
* The line within the file that the parse starts on. This value is
583+
* 1-indexed.
584+
*/
585+
pub line: i32,
586+
/**
587+
* The name of the encoding that the source file is in. Note that this must
588+
* correspond to a name that can be found with Encoding.find in Ruby.
589+
*/
590+
pub encoding: String,
591+
/**
592+
* Whether or not the frozen string literal option has been set.
593+
* May be:
594+
* - PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED
595+
* - PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED
596+
* - PM_OPTIONS_FROZEN_STRING_LITERAL_UNSET
597+
*/
598+
pub frozen_string_literal: Option<bool>,
599+
/** A bitset of the various options that were set on the command line. */
600+
pub command_line: u8,
601+
/**
602+
* The version of prism that we should be parsing with. This is used to
603+
* allow consumers to specify which behavior they want in case they need to
604+
* parse exactly as a specific version of CRuby.
605+
*/
606+
pub version: pm_options_version_t,
607+
/**
608+
* Whether or not the encoding magic comments should be respected. This is a
609+
* niche use-case where you want to parse a file with a specific encoding
610+
* but ignore any encoding magic comments at the top of the file.
611+
*/
612+
pub encoding_locked: bool,
613+
/**
614+
* When the file being parsed is the main script, the shebang will be
615+
* considered for command-line flags (or for implicit -x). The caller needs
616+
* to pass this information to the parser so that it can behave correctly.
617+
*/
618+
pub main_script: bool,
619+
/**
620+
* When the file being parsed is considered a "partial" script, jumps will
621+
* not be marked as errors if they are not contained within loops/blocks.
622+
* This is used in the case that you're parsing a script that you know will
623+
* be embedded inside another script later, but you do not have that context
624+
* yet. For example, when parsing an ERB template that will be evaluated
625+
* inside another script.
626+
*/
627+
pub partial_script: bool,
628+
/**
629+
* Whether or not the parser should freeze the nodes that it creates. This
630+
* makes it possible to have a deeply frozen AST that is safe to share
631+
* between concurrency primitives.
632+
*/
633+
pub freeze: bool,
634+
/**
635+
* The scopes surrounding the code that is being parsed. For most parses
636+
* this will be empty, but for evals it will be the locals that are in scope
637+
* surrounding the eval. Scopes are ordered from the outermost scope to the
638+
* innermost one.
639+
*/
640+
pub scopes: Vec<OptionsScope>,
641+
}
642+
643+
impl Default for Options {
644+
fn default() -> Self {
645+
Self {
646+
filepath: String::new(),
647+
line: 1,
648+
encoding: String::new(),
649+
frozen_string_literal: None,
650+
command_line: 0,
651+
version: pm_options_version_t::PM_OPTIONS_VERSION_LATEST,
652+
encoding_locked: false,
653+
main_script: true,
654+
partial_script: false,
655+
freeze: false,
656+
scopes: Vec::new(),
540657
}
541658
}
542659
}
543660

661+
impl Options {
662+
fn to_binary_string(&self) -> Vec<u8> {
663+
let mut output = Vec::new();
664+
665+
output.extend((self.filepath.len() as u32).to_ne_bytes());
666+
output.extend(self.filepath.as_bytes());
667+
output.extend(self.line.to_ne_bytes());
668+
output.extend((self.encoding.len() as u32).to_ne_bytes());
669+
output.extend(self.encoding.as_bytes());
670+
output.extend(self.frozen_string_literal.map_or_else(|| 0i8, |frozen| if frozen { 1 } else { -1 }).to_ne_bytes());
671+
output.push(self.command_line);
672+
output.extend((self.version as u8).to_ne_bytes());
673+
output.push(self.encoding_locked.into());
674+
output.push(self.main_script.into());
675+
output.push(self.partial_script.into());
676+
output.push(self.freeze.into());
677+
output.extend((self.scopes.len() as u32).to_ne_bytes());
678+
for scope in &self.scopes {
679+
output.extend((scope.locals.len() as u32).to_ne_bytes());
680+
output.extend(scope.forwarding_flags.to_ne_bytes());
681+
for local in &scope.locals {
682+
output.extend((local.len() as u32).to_ne_bytes());
683+
output.extend(local.as_bytes());
684+
}
685+
}
686+
output
687+
}
688+
}
689+
690+
struct Buffer {
691+
buffer: pm_buffer_t,
692+
}
693+
694+
impl Default for Buffer {
695+
fn default() -> Self {
696+
let buffer = unsafe {
697+
let mut uninit = MaybeUninit::<pm_buffer_t>::uninit();
698+
let initialized = pm_buffer_init(uninit.as_mut_ptr());
699+
assert!(initialized);
700+
uninit.assume_init()
701+
};
702+
Self { buffer }
703+
}
704+
}
705+
706+
impl Buffer {
707+
fn length(&self) -> usize {
708+
unsafe { pm_buffer_length(&self.buffer) }
709+
}
710+
711+
fn value(&self) -> &[u8] {
712+
unsafe {
713+
let value = pm_buffer_value(&self.buffer);
714+
let value = value.cast::<u8>().cast_const();
715+
std::slice::from_raw_parts(value, self.length())
716+
}
717+
}
718+
}
719+
720+
impl Drop for Buffer {
721+
fn drop(&mut self) {
722+
unsafe { pm_buffer_free(&mut self.buffer) }
723+
}
724+
}
725+
544726
/// Parses the given source string and returns a parse result.
545727
///
546728
/// # Panics
@@ -549,25 +731,56 @@ impl<'pr> Drop for ParseResult<'pr> {
549731
///
550732
#[must_use]
551733
pub fn parse(source: &[u8]) -> ParseResult<'_> {
734+
parse_with_options(source, &Options::default())
735+
}
736+
737+
/// Parses the given source string and returns a parse result.
738+
///
739+
/// # Panics
740+
///
741+
/// Panics if the parser fails to initialize.
742+
///
743+
#[must_use]
744+
pub fn parse_with_options<'pr>(source: &'pr [u8], options: &Options) -> ParseResult<'pr> {
745+
let options_string = options.to_binary_string();
552746
unsafe {
553747
let uninit = Box::new(MaybeUninit::<pm_parser_t>::uninit());
554748
let uninit = Box::into_raw(uninit);
555749

556-
pm_parser_init((*uninit).as_mut_ptr(), source.as_ptr(), source.len(), std::ptr::null());
750+
let options = Box::into_raw(Box::new(MaybeUninit::<pm_options_t>::zeroed()));
751+
pm_options_read((*options).as_mut_ptr(), options_string.as_ptr().cast());
752+
let options = NonNull::new((*options).assume_init_mut()).unwrap();
753+
754+
pm_parser_init((*uninit).as_mut_ptr(), source.as_ptr(), source.len(), options.as_ptr());
557755

558756
let parser = (*uninit).assume_init_mut();
559757
let parser = NonNull::new_unchecked(parser);
560758

561759
let node = pm_parse(parser.as_ptr());
562760
let node = NonNull::new_unchecked(node);
563761

564-
ParseResult { source, parser, node }
762+
ParseResult { source, parser, node, options_string, options }
763+
}
764+
}
765+
766+
/// Serializes the given source string and returns a parse result.
767+
///
768+
/// # Panics
769+
///
770+
/// Panics if the parser fails to initialize.
771+
#[must_use]
772+
pub fn serialize_parse(source: &[u8], options: &Options) -> Vec<u8> {
773+
let mut buffer = Buffer::default();
774+
let opts = options.to_binary_string();
775+
unsafe {
776+
pm_serialize_parse(&mut buffer.buffer, source.as_ptr(), source.len(), opts.as_ptr().cast());
565777
}
778+
buffer.value().into()
566779
}
567780

568781
#[cfg(test)]
569782
mod tests {
570-
use super::parse;
783+
use super::{parse, parse_with_options, serialize_parse};
571784

572785
#[test]
573786
fn comments_test() {
@@ -1157,6 +1370,29 @@ end
11571370
assert!((value - 1.0).abs() < f64::EPSILON);
11581371
}
11591372

1373+
#[test]
1374+
fn serialize_parse_test() {
1375+
let source = r#"__FILE__"#;
1376+
let mut options = crate::Options::default();
1377+
options.filepath = "test.rb".to_string();
1378+
let bytes = serialize_parse(source.as_ref(), &options);
1379+
1380+
let result = parse_with_options(source.as_bytes(), &options);
1381+
1382+
assert_eq!(bytes, result.serialize());
1383+
1384+
let expected = r#"@ ProgramNode (location: (1,0)-(1,8))
1385+
+-- locals: []
1386+
+-- statements:
1387+
@ StatementsNode (location: (1,0)-(1,8))
1388+
+-- body: (length: 1)
1389+
+-- @ SourceFileNode (location: (1,0)-(1,8))
1390+
+-- StringFlags: nil
1391+
+-- filepath: "test.rb"
1392+
"#;
1393+
assert_eq!(expected, result.node().pretty_print().as_str())
1394+
}
1395+
11601396
#[test]
11611397
fn node_field_lifetime_test() {
11621398
// The code below wouldn't typecheck prior to https://github.com/ruby/prism/pull/2519,

0 commit comments

Comments
 (0)