Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions src/compiler/diagnosticMessages.json
Original file line number Diff line number Diff line change
Expand Up @@ -1849,6 +1849,14 @@
"category": "Error",
"code": 1546
},
"Incomplete character class range. Expected a class set character.": {
"category": "Error",
"code": 1547
},
"Missing '{0}' in between class set operands. To form a union of these characters, wrap them in a nested class instead.": {
"category": "Error",
"code": 1548
},
Comment on lines +1852 to +1859
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would appreciate any better suggestions regarding the wording of the messages.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I also wonder if old diagnostic messages are amendable. Precisely, shall I alter the message of TS1519 from "Wrap it" to "Wrap them"?


"The types of '{0}' are incompatible between these types.": {
"category": "Error",
Expand Down
171 changes: 108 additions & 63 deletions src/compiler/scanner.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3076,7 +3076,7 @@ export function createScanner(

// ClassSetExpression ::= '^'? (ClassUnion | ClassIntersection | ClassSubtraction)
// ClassUnion ::= (ClassSetRange | ClassSetOperand)*
// ClassIntersection ::= ClassSetOperand ('&&' ClassSetOperand)+
// ClassIntersection ::= ClassSetOperand ('&&' [lookahead != '&'] ClassSetOperand)+
// ClassSubtraction ::= ClassSetOperand ('--' ClassSetOperand)+
// ClassSetRange ::= ClassSetCharacter '-' ClassSetCharacter
function scanClassSetExpression() {
Expand All @@ -3093,16 +3093,27 @@ export function createScanner(
}
let start = pos;
let operand!: string;
switch (text.slice(pos, pos + 2)) { // TODO: don't use slice
case "--":
case "&&":
error(Diagnostics.Expected_a_class_set_operand);
mayContainStrings = false;
break;
default:
operand = scanClassSetOperand();
break;
if (ch === charCodeChecked(pos + 1) && (ch === CharacterCodes.minus || ch === CharacterCodes.ampersand)) {
// Saw '--' or '&&', the first operand is absent. Only emit an error and don't consume the operator yet
error(Diagnostics.Expected_a_class_set_operand);
mayContainStrings = false;
}
// If we see '-', parse the expression as ClassUnion.
// An exception is made for '-&&', in which case it is more intuitive to parse as ClassIntersection and scan '-' as an invalid operand
else if (
ch === CharacterCodes.minus && (
charCodeChecked(pos + 1) !== CharacterCodes.ampersand || charCodeChecked(pos + 2) !== CharacterCodes.ampersand
)
) {
// '-' can't be an operand in a ClassUnion (a class in Unicode sets mode can't start with '-')
error(Diagnostics.Incomplete_character_class_range_Expected_a_class_set_character);
mayContainStrings = false;
}
Comment on lines +3101 to +3111
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was struggling whether to treat - as an unexpected character ("Unexpected '-'. Did you mean to escape it with backslash?") or an incomplete character class with a zero-width message in cases like /[-a-b-]/v

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And temporarily went with the former.

else {
// Scan the first operand
operand = scanClassSetOperand();
}
// Use the first operator to determine the expression type
switch (charCodeChecked(pos)) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it might not have functional difference right now but to future-proof this I'd still include this:

Suggested change
switch (charCodeChecked(pos)) {
switch (ch = charCodeChecked(pos)) {

The reason why this should be reassigned here is that the above call to scanClassSetOperand could move the pos cursor

case CharacterCodes.minus:
if (charCodeChecked(pos + 1) === CharacterCodes.minus) {
Expand All @@ -3125,9 +3136,6 @@ export function createScanner(
mayContainStrings = !isCharacterComplement && expressionMayContainStrings;
return;
}
else {
error(Diagnostics.Unexpected_0_Did_you_mean_to_escape_it_with_backslash, pos, 1, String.fromCharCode(ch));
}
break;
default:
if (isCharacterComplement && mayContainStrings) {
Expand All @@ -3136,30 +3144,34 @@ export function createScanner(
expressionMayContainStrings = mayContainStrings;
break;
}
// Neither a ClassIntersection nor a ClassSubtraction, scan as ClassUnion
while (true) {
ch = charCodeChecked(pos);
if (ch === CharacterCodes.EOF) {
break;
}
switch (ch) {
case CharacterCodes.minus:
pos++;
ch = charCodeChecked(pos);
ch = charCodeChecked(pos + 1);
if (isClassContentExit(ch)) {
// A class in Unicode sets mode can't end with '-', it must be followed by a class set character
pos++;
error(Diagnostics.Incomplete_character_class_range_Expected_a_class_set_character);
mayContainStrings = !isCharacterComplement && expressionMayContainStrings;
return;
}
if (ch === CharacterCodes.minus) {
// '--', skip for now and emit error later
}
else if (ch === CharacterCodes.ampersand && charCodeChecked(pos + 2) === CharacterCodes.ampersand) {
// '-&&', consume only the '-', skip '&&' for now and emit error later
pos++;
error(Diagnostics.Operators_must_not_be_mixed_within_a_character_class_Wrap_it_in_a_nested_class_instead, pos - 2, 2);
start = pos - 2;
operand = text.slice(start, pos);
continue;
}
else {
if (!operand) {
error(Diagnostics.A_character_class_range_must_not_be_bounded_by_another_character_class, start, pos - 1 - start);
error(Diagnostics.A_character_class_range_must_not_be_bounded_by_another_character_class, start, pos - start);
}
pos++;
const secondStart = pos;
const secondOperand = scanClassSetOperand();
if (isCharacterComplement && mayContainStrings) {
Expand All @@ -3184,37 +3196,19 @@ export function createScanner(
}
}
break;
case CharacterCodes.ampersand:
start = pos;
pos++;
if (charCodeChecked(pos) === CharacterCodes.ampersand) {
pos++;
error(Diagnostics.Operators_must_not_be_mixed_within_a_character_class_Wrap_it_in_a_nested_class_instead, pos - 2, 2);
if (charCodeChecked(pos) === CharacterCodes.ampersand) {
error(Diagnostics.Unexpected_0_Did_you_mean_to_escape_it_with_backslash, pos, 1, String.fromCharCode(ch));
pos++;
}
}
else {
error(Diagnostics.Unexpected_0_Did_you_mean_to_escape_it_with_backslash, pos - 1, 1, String.fromCharCode(ch));
}
operand = text.slice(start, pos);
continue;
}
if (isClassContentExit(charCodeChecked(pos))) {
ch = charCodeChecked(pos);
if (isClassContentExit(ch)) {
break;
}
start = pos;
switch (text.slice(pos, pos + 2)) { // TODO: don't use slice
case "--":
case "&&":
error(Diagnostics.Operators_must_not_be_mixed_within_a_character_class_Wrap_it_in_a_nested_class_instead, pos, 2);
pos += 2;
operand = text.slice(start, pos);
break;
default:
operand = scanClassSetOperand();
break;
if (ch === charCodeChecked(pos + 1) && (ch === CharacterCodes.minus || ch === CharacterCodes.ampersand)) {
error(Diagnostics.Operators_must_not_be_mixed_within_a_character_class_Wrap_it_in_a_nested_class_instead, pos, 2);
pos += 2;
operand = text.slice(start, pos);
}
Comment on lines +3205 to +3209
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would be even more intuitive if we were to rescan the class set as intersection/subtraction when we hit any -- or &&, but it is infeasible without a two-pass scan, the first of which must not emit errors. Unfortunately, I need to give up as it would overcomplicate the implementation.

else {
operand = scanClassSetOperand();
}
}
mayContainStrings = !isCharacterComplement && expressionMayContainStrings;
Expand All @@ -3230,40 +3224,91 @@ export function createScanner(
// Provide user-friendly diagnostic messages
switch (ch) {
case CharacterCodes.minus:
pos++;
if (charCodeChecked(pos) === CharacterCodes.minus) {
pos++;
if (charCodeChecked(pos + 1) === CharacterCodes.minus) {
if (expressionType !== ClassSetExpressionType.ClassSubtraction) {
error(Diagnostics.Operators_must_not_be_mixed_within_a_character_class_Wrap_it_in_a_nested_class_instead, pos - 2, 2);
error(Diagnostics.Operators_must_not_be_mixed_within_a_character_class_Wrap_it_in_a_nested_class_instead, pos, 2);
}
pos += 2;
if (
charCodeChecked(pos) === CharacterCodes.minus
&& charCodeChecked(pos + 1) === CharacterCodes.minus
&& charCodeChecked(pos + 2) !== CharacterCodes.minus
) {
// 4 consecutive '-', assuming two subtraction operators with a missing operand in between
error(Diagnostics.Expected_a_class_set_operand);
continue;
}
else if (
charCodeChecked(pos) === CharacterCodes.ampersand
&& charCodeChecked(pos + 1) === CharacterCodes.ampersand
) {
if (charCodeChecked(pos + 2) === CharacterCodes.ampersand) {
// '--&&&', consume a single '&' as a normal operand
pos++;
expressionMayContainStrings = false;
}
else {
// '--&&', assuming two operators with a missing operand in between.
// We could have emitted an "Expected a class set operand." error here, but doing so will suppress the next
// "Operators must not be mixed within a character class." error since it starts at the same position.
}
continue;
}
}
else if (charCodeChecked(pos + 1) === CharacterCodes.ampersand && charCodeChecked(pos + 2) === CharacterCodes.ampersand) {
// '-&&', consume '-' later as an invalid operand with error
}
else {
error(Diagnostics.Operators_must_not_be_mixed_within_a_character_class_Wrap_it_in_a_nested_class_instead, pos - 1, 1);
// a single '-', assuming class set range operator
error(Diagnostics.Operators_must_not_be_mixed_within_a_character_class_Wrap_it_in_a_nested_class_instead, pos, 1);
pos++;
}
break;
case CharacterCodes.ampersand:
pos++;
if (charCodeChecked(pos) === CharacterCodes.ampersand) {
pos++;
if (charCodeChecked(pos + 1) === CharacterCodes.ampersand) {
if (expressionType !== ClassSetExpressionType.ClassIntersection) {
error(Diagnostics.Operators_must_not_be_mixed_within_a_character_class_Wrap_it_in_a_nested_class_instead, pos - 2, 2);
error(Diagnostics.Operators_must_not_be_mixed_within_a_character_class_Wrap_it_in_a_nested_class_instead, pos, 2);
}
if (charCodeChecked(pos) === CharacterCodes.ampersand) {
error(Diagnostics.Unexpected_0_Did_you_mean_to_escape_it_with_backslash, pos, 1, String.fromCharCode(ch));
pos++;
pos += 2;
if (
charCodeChecked(pos) === CharacterCodes.minus
&& charCodeChecked(pos + 1) === CharacterCodes.minus
&& charCodeChecked(pos + 2) !== CharacterCodes.minus
) {
// '&&--', assuming two operators with a missing operand in between.
// We could have emitted an "Expected a class set operand." error here, but doing so will suppress the next
// "Operators must not be mixed within a character class." error since it starts at the same position.
continue;
}
else if (charCodeChecked(pos) === CharacterCodes.ampersand) {
if (
charCodeChecked(pos + 1) === CharacterCodes.ampersand
&& charCodeChecked(pos + 2) !== CharacterCodes.ampersand
) {
// 4 consecutive '&', assuming two intersection operators with a missing operand in between
error(Diagnostics.Expected_a_class_set_operand);
}
else {
// 3 or 5 or more consecutive '&', consume a single '&' with error
error(Diagnostics.Unexpected_0_Did_you_mean_to_escape_it_with_backslash, pos, 1, String.fromCharCode(ch));
pos++;
expressionMayContainStrings = false;
}
continue;
}
break;
}
else {
error(Diagnostics.Unexpected_0_Did_you_mean_to_escape_it_with_backslash, pos - 1, 1, String.fromCharCode(ch));
// The single '&' is just a normal operand, there must be a missing operator before it
}
break;
// falls through
default:
switch (expressionType) {
case ClassSetExpressionType.ClassSubtraction:
error(Diagnostics._0_expected, pos, 0, "--");
error(Diagnostics.Missing_0_in_between_class_set_operands_To_form_a_union_of_these_characters_wrap_them_in_a_nested_class_instead, pos, 0, "--");
break;
case ClassSetExpressionType.ClassIntersection:
error(Diagnostics._0_expected, pos, 0, "&&");
error(Diagnostics.Missing_0_in_between_class_set_operands_To_form_a_union_of_these_characters_wrap_them_in_a_nested_class_instead, pos, 0, "&&");
break;
default:
break;
Expand Down
Loading
Loading