Skip to content

Commit 42fe00b

Browse files
authored
Merge pull request #211 from TysonAndre/optimize-getDiagnostics-v2
Optimize getDiagnostics for speed
2 parents 0e936a1 + b6fb023 commit 42fe00b

File tree

6 files changed

+231
-100
lines changed

6 files changed

+231
-100
lines changed

experiments/BenchGetDiagnostics.php

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
<?php
2+
// Autoload required classes
3+
require dirname(__DIR__) . "/vendor/autoload.php";
4+
5+
use Microsoft\PhpParser\{DiagnosticsProvider, Node, Parser, PositionUtilities};
6+
7+
8+
const ITERATIONS = 100;
9+
// Return and print an AST from string contents
10+
$main = function() {
11+
// Instantiate new parser instance
12+
// TODO: Multiple source files to be realistic.
13+
$parser = new Parser();
14+
$t0 = microtime(true);
15+
$astNode = $parser->parseSourceFile(file_get_contents(__DIR__ . '/../src/Parser.php'));
16+
$t1 = microtime(true);
17+
for ($i = 0; $i < ITERATIONS; $i++) {
18+
$diagnostics = DiagnosticsProvider::getDiagnostics($astNode);
19+
}
20+
$t2 = microtime(true);
21+
printf("Average time to generate diagnostics: %.6f\n", ($t2 - $t1) / ITERATIONS);
22+
printf("time to parse: %.6f\n", ($t1 - $t0));
23+
global $__counts;
24+
var_export($__counts);
25+
};
26+
$main();

src/DiagnosticsProvider.php

Lines changed: 57 additions & 98 deletions
Original file line numberDiff line numberDiff line change
@@ -10,125 +10,79 @@
1010

1111
class DiagnosticsProvider {
1212

13+
/**
14+
* @var string[] maps the token kind to the corresponding name
15+
*/
1316
private static $tokenKindToText;
1417

18+
/**
19+
* @param int $kind (must be a valid token kind)
20+
* @return string
21+
*/
22+
public static function getTextForTokenKind($kind) {
23+
return self::$tokenKindToText[$kind];
24+
}
25+
26+
/**
27+
* This is called when this class is loaded, at the bottom of this file.
28+
* @return void
29+
*/
30+
public static function initTokenKindToText() {
31+
self::$tokenKindToText = \array_flip(\array_merge(
32+
TokenStringMaps::OPERATORS_AND_PUNCTUATORS,
33+
TokenStringMaps::KEYWORDS,
34+
TokenStringMaps::RESERVED_WORDS
35+
));
36+
}
37+
1538
/**
1639
* Returns the diagnostic for $node, or null.
17-
* @param \Microsoft\PhpParser\Node $node
40+
* @param \Microsoft\PhpParser\Node|\Microsoft\PhpParser\Token $node
1841
* @return Diagnostic|null
1942
*/
2043
public static function checkDiagnostics($node) {
21-
if (!isset(self::$tokenKindToText)) {
22-
self::$tokenKindToText = \array_flip(\array_merge(
23-
TokenStringMaps::OPERATORS_AND_PUNCTUATORS,
24-
TokenStringMaps::KEYWORDS,
25-
TokenStringMaps::RESERVED_WORDS
26-
));
44+
if ($node instanceof Token) {
45+
if (\get_class($node) === Token::class) {
46+
return null;
47+
}
48+
return self::checkDiagnosticForUnexpectedToken($node);
2749
}
2850

29-
if ($node instanceof SkippedToken) {
51+
if ($node instanceof Node) {
52+
return $node->getDiagnosticForNode();
53+
}
54+
return null;
55+
}
56+
57+
/**
58+
* @param Token $token
59+
* @return Diagnostic|null
60+
*/
61+
private static function checkDiagnosticForUnexpectedToken($token) {
62+
if ($token instanceof SkippedToken) {
3063
// TODO - consider also attaching parse context information to skipped tokens
3164
// this would allow us to provide more helpful error messages that inform users what to do
3265
// about the problem rather than simply pointing out the mistake.
3366
return new Diagnostic(
3467
DiagnosticKind::Error,
3568
"Unexpected '" .
36-
(self::$tokenKindToText[$node->kind]
37-
?? Token::getTokenKindNameFromValue($node->kind)) .
69+
(self::$tokenKindToText[$token->kind]
70+
?? Token::getTokenKindNameFromValue($token->kind)) .
3871
"'",
39-
$node->start,
40-
$node->getEndPosition() - $node->start
72+
$token->start,
73+
$token->getEndPosition() - $token->start
4174
);
42-
} elseif ($node instanceof MissingToken) {
75+
} elseif ($token instanceof MissingToken) {
4376
return new Diagnostic(
4477
DiagnosticKind::Error,
4578
"'" .
46-
(self::$tokenKindToText[$node->kind]
47-
?? Token::getTokenKindNameFromValue($node->kind)) .
79+
(self::$tokenKindToText[$token->kind]
80+
?? Token::getTokenKindNameFromValue($token->kind)) .
4881
"' expected.",
49-
$node->start,
50-
$node->getEndPosition() - $node->start
82+
$token->start,
83+
$token->getEndPosition() - $token->start
5184
);
5285
}
53-
54-
if ($node === null || $node instanceof Token) {
55-
return null;
56-
}
57-
58-
if ($node instanceof Node) {
59-
if ($node instanceof Node\MethodDeclaration) {
60-
foreach ($node->modifiers as $modifier) {
61-
if ($modifier->kind === TokenKind::VarKeyword) {
62-
return new Diagnostic(
63-
DiagnosticKind::Error,
64-
"Unexpected modifier '" . self::$tokenKindToText[$modifier->kind] . "'",
65-
$modifier->start,
66-
$modifier->length
67-
);
68-
}
69-
}
70-
}
71-
elseif ($node instanceof Node\Statement\NamespaceUseDeclaration) {
72-
if (
73-
$node->useClauses != null
74-
&& \count($node->useClauses->children) > 1
75-
) {
76-
foreach ($node->useClauses->children as $useClause) {
77-
if($useClause instanceof Node\NamespaceUseClause && !is_null($useClause->openBrace)) {
78-
return new Diagnostic(
79-
DiagnosticKind::Error,
80-
"; expected.",
81-
$useClause->getEndPosition(),
82-
1
83-
);
84-
}
85-
}
86-
}
87-
}
88-
else if ($node instanceof Node\Statement\BreakOrContinueStatement) {
89-
if ($node->breakoutLevel === null) {
90-
return null;
91-
}
92-
93-
$breakoutLevel = $node->breakoutLevel;
94-
while ($breakoutLevel instanceof Node\Expression\ParenthesizedExpression) {
95-
$breakoutLevel = $breakoutLevel->expression;
96-
}
97-
98-
if (
99-
$breakoutLevel instanceof Node\NumericLiteral
100-
&& $breakoutLevel->children->kind === TokenKind::IntegerLiteralToken
101-
) {
102-
$literalString = $breakoutLevel->getText();
103-
$firstTwoChars = \substr($literalString, 0, 2);
104-
105-
if ($firstTwoChars === '0b' || $firstTwoChars === '0B') {
106-
if (\bindec(\substr($literalString, 2)) > 0) {
107-
return null;
108-
}
109-
}
110-
else if (\intval($literalString, 0) > 0) {
111-
return null;
112-
}
113-
}
114-
115-
if ($breakoutLevel instanceof Token) {
116-
$start = $breakoutLevel->getStartPosition();
117-
}
118-
else {
119-
$start = $breakoutLevel->getStart();
120-
}
121-
$end = $breakoutLevel->getEndPosition();
122-
123-
return new Diagnostic(
124-
DiagnosticKind::Error,
125-
"Positive integer literal expected.",
126-
$start,
127-
$end - $start
128-
);
129-
}
130-
}
131-
return null;
13286
}
13387

13488
/**
@@ -139,12 +93,17 @@ public static function checkDiagnostics($node) {
13993
public static function getDiagnostics(Node $n) : array {
14094
$diagnostics = [];
14195

142-
foreach ($n->getDescendantNodesAndTokens() as $node) {
96+
/**
97+
* @param \Microsoft\PhpParser\Node|\Microsoft\PhpParser\Token $node
98+
*/
99+
$n->walkDescendantNodesAndTokens(function($node) use (&$diagnostics) {
143100
if (($diagnostic = self::checkDiagnostics($node)) !== null) {
144101
$diagnostics[] = $diagnostic;
145102
}
146-
}
103+
});
147104

148105
return $diagnostics;
149106
}
150107
}
108+
109+
DiagnosticsProvider::initTokenKindToText();

src/Node.php

Lines changed: 49 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -158,17 +158,55 @@ public function getDescendantNodesAndTokens(callable $shouldDescendIntoChildrenF
158158
// TODO - write unit tests to prove invariants
159159
// (concatenating all descendant Tokens should produce document, concatenating all Nodes should produce document)
160160
foreach ($this->getChildNodesAndTokens() as $child) {
161+
// Check possible types of $child, most frequent first
161162
if ($child instanceof Node) {
162163
yield $child;
163-
if ($shouldDescendIntoChildrenFn == null || $shouldDescendIntoChildrenFn($child)) {
164-
yield from $child->getDescendantNodesAndTokens($shouldDescendIntoChildrenFn);
164+
if ($shouldDescendIntoChildrenFn === null || $shouldDescendIntoChildrenFn($child)) {
165+
yield from $child->getDescendantNodesAndTokens($shouldDescendIntoChildrenFn);
165166
}
166167
} elseif ($child instanceof Token) {
167168
yield $child;
168169
}
169170
}
170171
}
171172

173+
/**
174+
* Iterate over all descendant Nodes and Tokens, calling $callback.
175+
* This can often be faster than getDescendantNodesAndTokens
176+
* if you just need to call something and don't need a generator.
177+
*
178+
* @param callable $callback a callback that accepts Node|Token
179+
* @param callable|null $shouldDescendIntoChildrenFn
180+
* @return void
181+
*/
182+
public function walkDescendantNodesAndTokens(callable $callback, callable $shouldDescendIntoChildrenFn = null) {
183+
// TODO - write unit tests to prove invariants
184+
// (concatenating all descendant Tokens should produce document, concatenating all Nodes should produce document)
185+
foreach (static::CHILD_NAMES as $name) {
186+
$child = $this->$name;
187+
// Check possible types of $child, most frequent first
188+
if ($child instanceof Token) {
189+
$callback($child);
190+
} elseif ($child instanceof Node) {
191+
$callback($child);
192+
if ($shouldDescendIntoChildrenFn === null || $shouldDescendIntoChildrenFn($child)) {
193+
$child->walkDescendantNodesAndTokens($callback, $shouldDescendIntoChildrenFn);
194+
}
195+
} elseif (\is_array($child)) {
196+
foreach ($child as $childElement) {
197+
if ($childElement instanceof Token) {
198+
$callback($childElement);
199+
} elseif ($childElement instanceof Node) {
200+
$callback($childElement);
201+
if ($shouldDescendIntoChildrenFn === null || $shouldDescendIntoChildrenFn($childElement)) {
202+
$childElement->walkDescendantNodesAndTokens($callback, $shouldDescendIntoChildrenFn);
203+
}
204+
}
205+
}
206+
}
207+
}
208+
}
209+
172210
/**
173211
* Gets a generator containing all descendant Nodes.
174212
* @param callable|null $shouldDescendIntoChildrenFn
@@ -639,4 +677,13 @@ private function addToImportTable($alias, $functionOrConst, $namespaceNameParts,
639677
}
640678
return array($namespaceImportTable, $functionImportTable, $constImportTable);
641679
}
680+
681+
/**
682+
* This is overridden in subclasses
683+
* @return Diagnostic|null - Callers should use DiagnosticsProvider::getDiagnostics instead
684+
* @internal
685+
*/
686+
public function getDiagnosticForNode() {
687+
return null;
688+
}
642689
}

src/Node/MethodDeclaration.php

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,9 @@
66

77
namespace Microsoft\PhpParser\Node;
88

9+
use Microsoft\PhpParser\Diagnostic;
10+
use Microsoft\PhpParser\DiagnosticKind;
11+
use Microsoft\PhpParser\DiagnosticsProvider;
912
use Microsoft\PhpParser\FunctionLike;
1013
use Microsoft\PhpParser\Node;
1114
use Microsoft\PhpParser\Token;
@@ -52,4 +55,22 @@ public function isStatic() : bool {
5255
public function getName() {
5356
return $this->name->getText($this->getFileContents());
5457
}
58+
59+
/**
60+
* @return Diagnostic|null - Callers should use DiagnosticsProvider::getDiagnostics instead
61+
* @internal
62+
* @override
63+
*/
64+
public function getDiagnosticForNode() {
65+
foreach ($this->modifiers as $modifier) {
66+
if ($modifier->kind === TokenKind::VarKeyword) {
67+
return new Diagnostic(
68+
DiagnosticKind::Error,
69+
"Unexpected modifier '" . DiagnosticsProvider::getTextForTokenKind($modifier->kind) . "'",
70+
$modifier->start,
71+
$modifier->length
72+
);
73+
}
74+
}
75+
}
5576
}

src/Node/Statement/BreakOrContinueStatement.php

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,13 @@
66

77
namespace Microsoft\PhpParser\Node\Statement;
88

9+
use Microsoft\PhpParser\Diagnostic;
10+
use Microsoft\PhpParser\DiagnosticKind;
11+
use Microsoft\PhpParser\Node;
912
use Microsoft\PhpParser\Node\Expression;
1013
use Microsoft\PhpParser\Node\StatementNode;
1114
use Microsoft\PhpParser\Token;
15+
use Microsoft\PhpParser\TokenKind;
1216

1317
class BreakOrContinueStatement extends StatementNode {
1418
/** @var Token */
@@ -23,4 +27,52 @@ class BreakOrContinueStatement extends StatementNode {
2327
'breakoutLevel',
2428
'semicolon'
2529
];
30+
31+
/**
32+
* @return Diagnostic|null - Callers should use DiagnosticsProvider::getDiagnostics instead
33+
* @internal
34+
* @override
35+
*/
36+
public function getDiagnosticForNode() {
37+
if ($this->breakoutLevel === null) {
38+
return null;
39+
}
40+
41+
$breakoutLevel = $this->breakoutLevel;
42+
while ($breakoutLevel instanceof Node\Expression\ParenthesizedExpression) {
43+
$breakoutLevel = $breakoutLevel->expression;
44+
}
45+
46+
if (
47+
$breakoutLevel instanceof Node\NumericLiteral
48+
&& $breakoutLevel->children->kind === TokenKind::IntegerLiteralToken
49+
) {
50+
$literalString = $breakoutLevel->getText();
51+
$firstTwoChars = \substr($literalString, 0, 2);
52+
53+
if ($firstTwoChars === '0b' || $firstTwoChars === '0B') {
54+
if (\bindec(\substr($literalString, 2)) > 0) {
55+
return null;
56+
}
57+
}
58+
else if (\intval($literalString, 0) > 0) {
59+
return null;
60+
}
61+
}
62+
63+
if ($breakoutLevel instanceof Token) {
64+
$start = $breakoutLevel->getStartPosition();
65+
}
66+
else {
67+
$start = $breakoutLevel->getStart();
68+
}
69+
$end = $breakoutLevel->getEndPosition();
70+
71+
return new Diagnostic(
72+
DiagnosticKind::Error,
73+
"Positive integer literal expected.",
74+
$start,
75+
$end - $start
76+
);
77+
}
2678
}

0 commit comments

Comments
 (0)