Skip to content

Commit ae78e3c

Browse files
committed
Type every node field and mark on-error-only types explicitly
* For Loader.java, do not deserialize the AST if there are errors, so then Java nodes only have non-error types for fields.
1 parent 01179aa commit ae78e3c

File tree

11 files changed

+335
-60
lines changed

11 files changed

+335
-60
lines changed

config.yml

Lines changed: 206 additions & 14 deletions
Large diffs are not rendered by default.

docs/parsing_rules.md

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,10 @@ Constants in Ruby begin with an upper-case letter. This is followed by any numbe
1212

1313
Most expressions in CRuby are non-void. This means the expression they represent resolves to a value. For example, `1 + 2` is a non-void expression, because it resolves to a method call. Even things like `class Foo; end` is a non-void expression, because it returns the last evaluated expression in the body of the class (or `nil`).
1414

15-
Certain nodes, however, are void expressions, and cannot be combined to form larger expressions. For example, `BEGIN {}`, `END {}`, `alias foo bar`, and `undef foo`.
15+
Certain nodes, however, are void expressions, and cannot be combined to form larger expressions.
16+
* `BEGIN {}`, `END {}`, `alias foo bar`, and `undef foo` can only be at a statement position.
17+
* The "jumps": `return`, `break`, `next`, `redo`, `retry` are void expressions.
18+
* `value => pattern` is also considered a void expression.
1619

1720
## Identifiers
1821

rakelib/typecheck.rake

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ namespace :typecheck do
5959
--ignore=test/
6060
--ignore=rakelib/
6161
--ignore=Rakefile
62+
--ignore=top-100-gems/
6263
# Treat all files as "typed: true" by default
6364
--typed=true
6465
# Use the typed-override file to revert some files to "typed: false"

rust/ruby-prism/build.rs

Lines changed: 45 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -47,11 +47,27 @@ enum NodeFieldType {
4747
Double,
4848
}
4949

50+
#[derive(Debug, Deserialize)]
51+
#[allow(dead_code)]
52+
struct OnErrorType {
53+
#[serde(rename = "on error")]
54+
kind: String,
55+
}
56+
57+
#[derive(Debug, Deserialize)]
58+
#[serde(untagged)]
59+
#[allow(dead_code)]
60+
enum UnionKind {
61+
OnSuccess(String),
62+
OnError(OnErrorType),
63+
}
64+
5065
#[derive(Debug, Deserialize)]
5166
#[serde(untagged)]
67+
#[allow(dead_code)]
5268
enum NodeFieldKind {
5369
Concrete(String),
54-
Union(Vec<String>),
70+
Union(Vec<UnionKind>),
5571
}
5672

5773
#[derive(Debug, Deserialize)]
@@ -126,6 +142,13 @@ fn struct_name(name: &str) -> String {
126142
result
127143
}
128144

145+
fn kind_to_type(kind: &String) -> String {
146+
match kind.as_str() {
147+
"non-void expression" | "pattern expression" | "Node" => String::new(),
148+
_ => kind.to_string(),
149+
}
150+
}
151+
129152
/// Returns the name of the C type from the given node name.
130153
fn type_name(name: &str) -> String {
131154
let mut result = String::with_capacity(8 + name.len());
@@ -263,30 +286,34 @@ fn write_node(file: &mut File, flags: &[Flags], node: &Node) -> Result<(), Box<d
263286
writeln!(file, " #[must_use]")?;
264287

265288
match field.field_type {
266-
NodeFieldType::Node => {
267-
if let Some(NodeFieldKind::Concrete(kind)) = &field.kind {
289+
NodeFieldType::Node => match &field.kind {
290+
Some(NodeFieldKind::Concrete(raw_kind)) if !kind_to_type(raw_kind).is_empty() => {
291+
let kind = kind_to_type(raw_kind);
268292
writeln!(file, " pub fn {}(&self) -> {}<'pr> {{", field.name, kind)?;
269-
writeln!(file, " let node: *mut pm{}_t = unsafe {{ (*self.pointer).{} }};", struct_name(kind), field.name)?;
293+
writeln!(file, " let node: *mut pm{}_t = unsafe {{ (*self.pointer).{} }};", struct_name(&kind), field.name)?;
270294
writeln!(file, " {} {{ parser: self.parser, pointer: node, marker: PhantomData }}", kind)?;
271295
writeln!(file, " }}")?;
272-
} else {
296+
},
297+
_ => {
273298
writeln!(file, " pub fn {}(&self) -> Node<'pr> {{", field.name)?;
274299
writeln!(file, " let node: *mut pm_node_t = unsafe {{ (*self.pointer).{} }};", field.name)?;
275300
writeln!(file, " Node::new(self.parser, node)")?;
276301
writeln!(file, " }}")?;
277-
}
302+
},
278303
},
279-
NodeFieldType::OptionalNode => {
280-
if let Some(NodeFieldKind::Concrete(kind)) = &field.kind {
304+
NodeFieldType::OptionalNode => match &field.kind {
305+
Some(NodeFieldKind::Concrete(raw_kind)) if !kind_to_type(raw_kind).is_empty() => {
306+
let kind = kind_to_type(raw_kind);
281307
writeln!(file, " pub fn {}(&self) -> Option<{}<'pr>> {{", field.name, kind)?;
282-
writeln!(file, " let node: *mut pm{}_t = unsafe {{ (*self.pointer).{} }};", struct_name(kind), field.name)?;
308+
writeln!(file, " let node: *mut pm{}_t = unsafe {{ (*self.pointer).{} }};", struct_name(&kind), field.name)?;
283309
writeln!(file, " if node.is_null() {{")?;
284310
writeln!(file, " None")?;
285311
writeln!(file, " }} else {{")?;
286312
writeln!(file, " Some({} {{ parser: self.parser, pointer: node, marker: PhantomData }})", kind)?;
287313
writeln!(file, " }}")?;
288314
writeln!(file, " }}")?;
289-
} else {
315+
},
316+
_ => {
290317
writeln!(file, " pub fn {}(&self) -> Option<Node<'pr>> {{", field.name)?;
291318
writeln!(file, " let node: *mut pm_node_t = unsafe {{ (*self.pointer).{} }};", field.name)?;
292319
writeln!(file, " if node.is_null() {{")?;
@@ -295,7 +322,7 @@ fn write_node(file: &mut File, flags: &[Flags], node: &Node) -> Result<(), Box<d
295322
writeln!(file, " Some(Node::new(self.parser, node))")?;
296323
writeln!(file, " }}")?;
297324
writeln!(file, " }}")?;
298-
}
325+
},
299326
},
300327
NodeFieldType::NodeList => {
301328
writeln!(file, " pub fn {}(&self) -> NodeList<'pr> {{", field.name)?;
@@ -473,16 +500,18 @@ fn write_visit(file: &mut File, config: &Config) -> Result<(), Box<dyn std::erro
473500
for field in &node.fields {
474501
match field.field_type {
475502
NodeFieldType::Node => {
476-
if let Some(NodeFieldKind::Concrete(kind)) = &field.kind {
477-
writeln!(file, " visitor.visit{}(&node.{}());", struct_name(kind), field.name)?;
503+
if let Some(NodeFieldKind::Concrete(raw_kind)) = &field.kind {
504+
let kind = kind_to_type(raw_kind);
505+
writeln!(file, " visitor.visit{}(&node.{}());", struct_name(&kind), field.name)?;
478506
} else {
479507
writeln!(file, " visitor.visit(&node.{}());", field.name)?;
480508
}
481509
},
482510
NodeFieldType::OptionalNode => {
483-
if let Some(NodeFieldKind::Concrete(kind)) = &field.kind {
511+
if let Some(NodeFieldKind::Concrete(raw_kind)) = &field.kind {
512+
let kind = kind_to_type(raw_kind);
484513
writeln!(file, " if let Some(node) = node.{}() {{", field.name)?;
485-
writeln!(file, " visitor.visit{}(&node);", struct_name(kind))?;
514+
writeln!(file, " visitor.visit{}(&node);", struct_name(&kind))?;
486515
writeln!(file, " }}")?;
487516
} else {
488517
writeln!(file, " if let Some(node) = node.{}() {{", field.name)?;
@@ -762,7 +791,7 @@ impl TryInto<i32> for Integer<'_> {{
762791
let length = unsafe {{ (*self.pointer).length }};
763792
764793
if length == 0 {{
765-
i32::try_from(unsafe {{ (*self.pointer).value }}).map_or(Err(()), |value|
794+
i32::try_from(unsafe {{ (*self.pointer).value }}).map_or(Err(()), |value|
766795
if negative {{
767796
Ok(-value)
768797
}} else {{

src/prism.c

Lines changed: 24 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2989,6 +2989,7 @@ pm_index_and_write_node_create(pm_parser_t *parser, pm_call_node_t *target, cons
29892989

29902990
pm_index_arguments_check(parser, target->arguments, target->block);
29912991

2992+
assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE));
29922993
*node = (pm_index_and_write_node_t) {
29932994
{
29942995
.type = PM_INDEX_AND_WRITE_NODE,
@@ -3004,7 +3005,7 @@ pm_index_and_write_node_create(pm_parser_t *parser, pm_call_node_t *target, cons
30043005
.opening_loc = target->opening_loc,
30053006
.arguments = target->arguments,
30063007
.closing_loc = target->closing_loc,
3007-
.block = target->block,
3008+
.block = (pm_block_argument_node_t *) target->block,
30083009
.operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
30093010
.value = value
30103011
};
@@ -3064,6 +3065,7 @@ pm_index_operator_write_node_create(pm_parser_t *parser, pm_call_node_t *target,
30643065

30653066
pm_index_arguments_check(parser, target->arguments, target->block);
30663067

3068+
assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE));
30673069
*node = (pm_index_operator_write_node_t) {
30683070
{
30693071
.type = PM_INDEX_OPERATOR_WRITE_NODE,
@@ -3079,7 +3081,7 @@ pm_index_operator_write_node_create(pm_parser_t *parser, pm_call_node_t *target,
30793081
.opening_loc = target->opening_loc,
30803082
.arguments = target->arguments,
30813083
.closing_loc = target->closing_loc,
3082-
.block = target->block,
3084+
.block = (pm_block_argument_node_t *) target->block,
30833085
.binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
30843086
.binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
30853087
.value = value
@@ -3141,6 +3143,7 @@ pm_index_or_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const
31413143

31423144
pm_index_arguments_check(parser, target->arguments, target->block);
31433145

3146+
assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE));
31443147
*node = (pm_index_or_write_node_t) {
31453148
{
31463149
.type = PM_INDEX_OR_WRITE_NODE,
@@ -3156,7 +3159,7 @@ pm_index_or_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const
31563159
.opening_loc = target->opening_loc,
31573160
.arguments = target->arguments,
31583161
.closing_loc = target->closing_loc,
3159-
.block = target->block,
3162+
.block = (pm_block_argument_node_t *) target->block,
31603163
.operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
31613164
.value = value
31623165
};
@@ -3209,6 +3212,7 @@ pm_index_target_node_create(pm_parser_t *parser, pm_call_node_t *target) {
32093212

32103213
pm_index_arguments_check(parser, target->arguments, target->block);
32113214

3215+
assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE));
32123216
*node = (pm_index_target_node_t) {
32133217
{
32143218
.type = PM_INDEX_TARGET_NODE,
@@ -3220,7 +3224,7 @@ pm_index_target_node_create(pm_parser_t *parser, pm_call_node_t *target) {
32203224
.opening_loc = target->opening_loc,
32213225
.arguments = target->arguments,
32223226
.closing_loc = target->closing_loc,
3223-
.block = target->block
3227+
.block = (pm_block_argument_node_t *) target->block,
32243228
};
32253229

32263230
// Here we're going to free the target, since it is no longer necessary.
@@ -3235,7 +3239,7 @@ pm_index_target_node_create(pm_parser_t *parser, pm_call_node_t *target) {
32353239
* Allocate and initialize a new CapturePatternNode node.
32363240
*/
32373241
static pm_capture_pattern_node_t *
3238-
pm_capture_pattern_node_create(pm_parser_t *parser, pm_node_t *value, pm_node_t *target, const pm_token_t *operator) {
3242+
pm_capture_pattern_node_create(pm_parser_t *parser, pm_node_t *value, pm_local_variable_target_node_t *target, const pm_token_t *operator) {
32393243
pm_capture_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_capture_pattern_node_t);
32403244

32413245
*node = (pm_capture_pattern_node_t) {
@@ -3244,7 +3248,7 @@ pm_capture_pattern_node_create(pm_parser_t *parser, pm_node_t *value, pm_node_t
32443248
.node_id = PM_NODE_IDENTIFY(parser),
32453249
.location = {
32463250
.start = value->location.start,
3247-
.end = target->location.end
3251+
.end = target->base.location.end
32483252
},
32493253
},
32503254
.value = value,
@@ -4036,14 +4040,25 @@ pm_find_pattern_node_create(pm_parser_t *parser, pm_node_list_t *nodes) {
40364040
pm_find_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_find_pattern_node_t);
40374041

40384042
pm_node_t *left = nodes->nodes[0];
4043+
assert(PM_NODE_TYPE_P(left, PM_SPLAT_NODE));
4044+
pm_splat_node_t *left_splat_node = (pm_splat_node_t *) left;
4045+
40394046
pm_node_t *right;
40404047

40414048
if (nodes->size == 1) {
40424049
right = (pm_node_t *) pm_missing_node_create(parser, left->location.end, left->location.end);
40434050
} else {
40444051
right = nodes->nodes[nodes->size - 1];
4052+
assert(PM_NODE_TYPE_P(right, PM_SPLAT_NODE));
40454053
}
40464054

4055+
#if PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS
4056+
// FindPatternNode#right is typed as SplatNode in this case, so replace the potential MissingNode with a SplatNode.
4057+
// The resulting AST will anyway be ignored, but this file still needs to compile.
4058+
pm_splat_node_t *right_splat_node = PM_NODE_TYPE_P(right, PM_SPLAT_NODE) ? (pm_splat_node_t *) right : left_splat_node;
4059+
#else
4060+
pm_node_t *right_splat_node = right;
4061+
#endif
40474062
*node = (pm_find_pattern_node_t) {
40484063
{
40494064
.type = PM_FIND_PATTERN_NODE,
@@ -4054,8 +4069,8 @@ pm_find_pattern_node_create(pm_parser_t *parser, pm_node_list_t *nodes) {
40544069
},
40554070
},
40564071
.constant = NULL,
4057-
.left = left,
4058-
.right = right,
4072+
.left = left_splat_node,
4073+
.right = right_splat_node,
40594074
.requireds = { 0 },
40604075
.opening_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
40614076
.closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
@@ -17406,7 +17421,7 @@ parse_pattern_primitives(pm_parser_t *parser, pm_constant_id_list_t *captures, p
1740617421
}
1740717422

1740817423
parse_pattern_capture(parser, captures, constant_id, &PM_LOCATION_TOKEN_VALUE(&parser->previous));
17409-
pm_node_t *target = (pm_node_t *) pm_local_variable_target_node_create(
17424+
pm_local_variable_target_node_t *target = pm_local_variable_target_node_create(
1741017425
parser,
1741117426
&PM_LOCATION_TOKEN_VALUE(&parser->previous),
1741217427
constant_id,

templates/include/prism/ast.h.erb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -218,6 +218,6 @@ typedef enum pm_<%= flag.human %> {
218218
* to specify that through the environment. It will never be true except for in
219219
* those build systems.
220220
*/
221-
#define PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS <%= Prism::Template::SERIALIZE_ONLY_SEMANTICS_FIELDS %>
221+
#define PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS <%= Prism::Template::SERIALIZE_ONLY_SEMANTICS_FIELDS ? 1 : 0 %>
222222

223223
#endif

templates/java/org/prism/Loader.java.erb

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -127,16 +127,21 @@ public class Loader {
127127
int constantPoolLength = loadVarUInt();
128128
this.constantPool = new ConstantPool(this, source.bytes, constantPoolBufferOffset, constantPoolLength);
129129

130-
Nodes.Node node = loadNode();
130+
Nodes.Node node;
131+
if (errors.length == 0) {
132+
node = loadNode();
131133

132-
int left = constantPoolBufferOffset - buffer.position();
133-
if (left != 0) {
134-
throw new Error("Expected to consume all bytes while deserializing but there were " + left + " bytes left");
135-
}
134+
int left = constantPoolBufferOffset - buffer.position();
135+
if (left != 0) {
136+
throw new Error("Expected to consume all bytes while deserializing but there were " + left + " bytes left");
137+
}
136138

137-
boolean[] newlineMarked = new boolean[1 + source.getLineCount()];
138-
MarkNewlinesVisitor visitor = new MarkNewlinesVisitor(source, newlineMarked);
139-
node.accept(visitor);
139+
boolean[] newlineMarked = new boolean[1 + source.getLineCount()];
140+
MarkNewlinesVisitor visitor = new MarkNewlinesVisitor(source, newlineMarked);
141+
node.accept(visitor);
142+
} else {
143+
node = null;
144+
}
140145

141146
return new ParseResult(node, magicComments, dataLocation, errors, warnings, source);
142147
}

templates/lib/prism/dsl.rb.erb

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -70,10 +70,10 @@ module Prism
7070
def <%= node.human %>(<%= ["source: default_source", "node_id: 0", "location: default_location", "flags: 0", *node.fields.map { |field|
7171
case field
7272
when Prism::Template::NodeField
73-
if !field.kind?
73+
kind = field.specific_kind || field.union_kind&.first
74+
if kind.nil?
7475
"#{field.name}: default_node(source, location)"
7576
else
76-
kind = field.specific_kind || field.union_kind.first
7777
"#{field.name}: #{kind.gsub(/(?<=.)[A-Z]/, "_\\0").downcase}(source: source)"
7878
end
7979
when Prism::Template::ConstantField

templates/lib/prism/node.rb.erb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -225,7 +225,7 @@ module Prism
225225
@flags = flags
226226
<%- node.fields.each do |field| -%>
227227
<%- if Prism::Template::CHECK_FIELD_KIND && field.respond_to?(:check_field_kind) -%>
228-
raise <%= field.name %>.inspect unless <%= field.check_field_kind %>
228+
raise "<%= node.name %>#<%= field.name %> was of unexpected type:\n#{<%= field.name %>.inspect}" unless <%= field.check_field_kind %>
229229
<%- end -%>
230230
@<%= field.name %> = <%= field.name %>
231231
<%- end -%>

templates/rbi/prism/dsl.rbi.erb

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,10 @@ module Prism::DSL
1414
].concat(node.fields.map { |field|
1515
case field
1616
when Prism::Template::NodeField
17-
if !field.kind?
17+
kind = field.specific_kind || field.union_kind&.first
18+
if kind.nil?
1819
[field.name, "default_node(source, location)", field.rbi_class]
1920
else
20-
kind = field.specific_kind || field.union_kind.first
2121
[field.name, %Q{#{kind.gsub(/(?<=.)[A-Z]/, "_\\0").downcase}(source: source)}, field.rbi_class]
2222
end
2323
when Prism::Template::OptionalNodeField

0 commit comments

Comments
 (0)