Skip to content

Commit 67baab6

Browse files
atrosinenkoadangel
authored andcommitted
[core] Initial implementation of Source Code Minimizer
* Implement simple greedy interactive strategy * Implement quite conservative whitespace clean-up heuristic * Optimize greedy strategy * Refactor SCM * More optimizations
0 parents  commit 67baab6

36 files changed

+2130
-0
lines changed
Lines changed: 272 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,272 @@
1+
/**
2+
* BSD-style license; for more info see http://pmd.sourceforge.net/license.html
3+
*/
4+
5+
package net.sourceforge.pmd.scm;
6+
7+
import java.io.BufferedReader;
8+
import java.io.BufferedWriter;
9+
import java.io.IOException;
10+
import java.nio.charset.Charset;
11+
import java.nio.file.Files;
12+
import java.nio.file.Path;
13+
import java.nio.file.StandardCopyOption;
14+
import java.util.ArrayList;
15+
import java.util.Collection;
16+
import java.util.HashSet;
17+
import java.util.List;
18+
import java.util.Set;
19+
20+
import net.sourceforge.pmd.document.DeleteDocumentOperation;
21+
import net.sourceforge.pmd.document.DocumentFile;
22+
import net.sourceforge.pmd.document.DocumentOperationsApplierForNonOverlappingRegions;
23+
import net.sourceforge.pmd.lang.Parser;
24+
import net.sourceforge.pmd.lang.ast.Node;
25+
26+
/**
27+
* A class for generating source files (as a plain text) from the <b>subset</b> of the given AST.
28+
*
29+
* The <b>expected</b> invariant is that the following trees should be equal:
30+
* <ul>
31+
* <li>the original tree with every node marked for removal being deleted with its descending nodes</li>
32+
* <li>the result of parsing of the plain-text file obtained from the corresponding subset</li>
33+
* </ul>
34+
*
35+
* In other words, for original source <code>TEXT</code> and <code>NODES</code> being a subset of all its AST nodes,
36+
* <code>parse(cut(TEXT, NODES)) == drop-recursively(parse(TEXT), NODES)</code>.
37+
*
38+
* This requirement can be slightly relaxed (such as not requiring presence of nodes that became empty).
39+
*
40+
* Please note, that this operation is <b>not required</b> to somehow retain formatting or create
41+
* nicely formatted files.
42+
*/
43+
public class ASTCutter implements AutoCloseable {
44+
private static final String WHITESPACE_CHARS = " \t";
45+
46+
private final Path lastCommitted = Files.createTempFile("pmd-", ".tmp");
47+
private final Parser parser;
48+
private final Charset charset;
49+
50+
private final Path scratchFile;
51+
private Node currentRoot;
52+
private final Set<Node> currentDocumentNodes = new HashSet<>();
53+
54+
/**
55+
* Create ASTCutter instance
56+
* @param parser parser for the original and intermediate source files
57+
* @param charset charset of source to be cut
58+
* @param scratchFile file to be modified in-place
59+
*/
60+
public ASTCutter(Parser parser, Charset charset, Path scratchFile) throws IOException {
61+
this.parser = parser;
62+
this.charset = charset;
63+
this.scratchFile = scratchFile;
64+
}
65+
66+
public Path getScratchFile() {
67+
return scratchFile;
68+
}
69+
70+
/**
71+
* Converts list of AST {@link Node}s to be cut off into List of {@link DeleteDocumentOperation}s dealing with
72+
* the plain text file representation.
73+
*
74+
* @param treeRoot the root of AST corresponding to the file being processed
75+
* @param deletedNodes the nodes marked for removal (all elements are expected to be accessible from the <code>treeRoot</code>)
76+
* @return a list of non-overlapping operations that, being applied on the file parsed as <code>treeRoot</code>,
77+
* would generate a file that is parsed to <code>treeRoot</code> with all marked codes being cut off recursively.
78+
*/
79+
private List<DeleteDocumentOperation> calculateTreeCutting(Node treeRoot, Collection<Node> deletedNodes) {
80+
ArrayList<DeleteDocumentOperation> result = new ArrayList<>();
81+
calculateTreeCutting(result, treeRoot, new HashSet<>(deletedNodes));
82+
return result;
83+
}
84+
85+
private void calculateTreeCutting(List<DeleteDocumentOperation> result, Node node, Set<Node> deletedNodes) {
86+
if (deletedNodes.contains(node)) {
87+
// not recursing, deleting the whole range
88+
result.add(new DeleteDocumentOperation(
89+
node.getBeginLine() - 1, node.getEndLine() - 1,
90+
node.getBeginColumn() - 1, node.getEndColumn()));
91+
} else {
92+
for (int i = 0; i < node.jjtGetNumChildren(); ++i) {
93+
calculateTreeCutting(result, node.jjtGetChild(i), deletedNodes);
94+
}
95+
}
96+
}
97+
98+
/**
99+
* Performs some conservative trimming of large parts of source code
100+
* not belonging to the AST (such as block comments).
101+
*/
102+
private List<DeleteDocumentOperation> calculateTreeHolesTrimming() throws IOException {
103+
List<DeleteDocumentOperation> result = new ArrayList<>();
104+
List<String> lines = Files.readAllLines(lastCommitted, charset);
105+
Node rootNode = load(lastCommitted);
106+
calculateTreeHolesTrimming(result, lines, rootNode, -1, 0, false);
107+
return result;
108+
}
109+
110+
private void calculateTreeHolesTrimming(List<DeleteDocumentOperation> result, List<String> lines, Node node, final int prevEndLine, final int prevEndColumn, final boolean wasJustTrimmed) {
111+
final int curBeginLine = node.getBeginLine() - 1;
112+
final int curBeginColumn = node.getBeginColumn() - 1;
113+
114+
boolean wasTrimmedHere = false;
115+
116+
if (!wasJustTrimmed && prevEndLine < curBeginLine - 1) {
117+
// retain whitespace indentation to the left
118+
String curLine = lines.get(curBeginLine);
119+
int endDeleteLine = curBeginLine;
120+
int endDeleteColumn = curBeginColumn - 1;
121+
for (; endDeleteColumn >= 0 && WHITESPACE_CHARS.indexOf(curLine.charAt(endDeleteColumn)) != -1; --endDeleteColumn) {
122+
// nothing else
123+
}
124+
// are we retaining the whole current line (it is likely)
125+
if (endDeleteColumn == -1 && endDeleteLine > 0) {
126+
endDeleteLine -= 1;
127+
endDeleteColumn = lines.get(endDeleteLine).length();
128+
} else {
129+
endDeleteColumn += 1;
130+
}
131+
132+
// check that end line of previous Node does not contain non-whitespace after end column
133+
boolean okToTrim = true;
134+
String prevLine = prevEndLine == -1 ? "" : lines.get(prevEndLine);
135+
for (int ind = prevEndColumn + 1; ind < prevLine.length(); ++ind) {
136+
if (WHITESPACE_CHARS.indexOf(prevLine.charAt(ind)) == -1) {
137+
okToTrim = false;
138+
break;
139+
}
140+
}
141+
if (okToTrim) {
142+
result.add(new DeleteDocumentOperation(prevEndLine + 1, endDeleteLine, 0, endDeleteColumn));
143+
wasTrimmedHere = true;
144+
}
145+
}
146+
147+
int prevChildEndLine = prevEndLine;
148+
int prevChildEndColumn = prevEndColumn;
149+
150+
for (int childInd = 0; childInd < node.jjtGetNumChildren(); ++childInd) {
151+
final Node child = node.jjtGetChild(childInd);
152+
153+
calculateTreeHolesTrimming(result, lines, child, prevChildEndLine, prevChildEndColumn, childInd == 0 && (wasTrimmedHere || wasJustTrimmed));
154+
155+
prevChildEndLine = child.getEndLine() - 1;
156+
prevChildEndColumn = child.getEndColumn() - 1;
157+
}
158+
}
159+
160+
/**
161+
* Checks that the input string contains only "allowed" chars
162+
*
163+
* @param text A string to check
164+
* @param chars A string containing chars the checked string should be comprised of
165+
*/
166+
private boolean allCharsFrom(String text, String chars) {
167+
for (int i = 0; i < text.length(); ++i) {
168+
if (chars.indexOf(text.charAt(i)) == -1) {
169+
return false;
170+
}
171+
}
172+
return true;
173+
}
174+
175+
/**
176+
* Trims lines of scratch file that contain only whitespace characters.
177+
*/
178+
private void trimEmptyLinesInPlace(Path trimmedFile) throws IOException {
179+
List<String> lines = Files.readAllLines(trimmedFile, charset);
180+
try (BufferedWriter writer = Files.newBufferedWriter(trimmedFile, charset)) {
181+
for (String line : lines) {
182+
if (!allCharsFrom(line, WHITESPACE_CHARS)) {
183+
writer.write(line);
184+
writer.write('\n');
185+
}
186+
}
187+
}
188+
}
189+
190+
/**
191+
* Populates set of nodes of current document with nodes from this subtree.
192+
*/
193+
private void collectAllNodes(Node subtree) {
194+
currentDocumentNodes.add(subtree);
195+
for (int i = 0; i < subtree.jjtGetNumChildren(); ++i) {
196+
collectAllNodes(subtree.jjtGetChild(i));
197+
}
198+
}
199+
200+
/**
201+
* Loads the specified file with the parser, does not change ASTCutter state.
202+
*/
203+
private Node load(Path from) throws IOException {
204+
try (BufferedReader reader = Files.newBufferedReader(from, charset)) {
205+
return parser.parse(from.toString(), reader);
206+
}
207+
}
208+
209+
/**
210+
* Accepts the last written file state as a new intermediate state.
211+
*
212+
* Please note, this does not anyhow relate to committing files under version control, if any.
213+
*
214+
* @return The root node of the "new current" source state
215+
*/
216+
public Node commitChange() throws IOException {
217+
currentRoot = load(scratchFile);
218+
// if not thrown, then ...
219+
Files.copy(scratchFile, lastCommitted, StandardCopyOption.REPLACE_EXISTING);
220+
221+
currentDocumentNodes.clear();
222+
collectAllNodes(currentRoot);
223+
224+
return currentRoot;
225+
}
226+
227+
/**
228+
* Rolls back intermediate file to the last <i>committed</i> state.
229+
*/
230+
public void rollbackChange() throws IOException {
231+
Files.copy(lastCommitted, scratchFile, StandardCopyOption.REPLACE_EXISTING);
232+
}
233+
234+
private void deleteRegions(List<DeleteDocumentOperation> operations) throws IOException {
235+
try (DocumentFile document = new DocumentFile(scratchFile.toFile(), charset)) {
236+
DocumentOperationsApplierForNonOverlappingRegions applier = new DocumentOperationsApplierForNonOverlappingRegions(document);
237+
for (DeleteDocumentOperation operation : operations) {
238+
applier.addDocumentOperation(operation);
239+
}
240+
applier.apply();
241+
}
242+
}
243+
244+
/**
245+
* Rolls back intermediate file, then tries to trim it once again.
246+
*
247+
* @param nodesToRemove nodes that have to be dropped from the resulting file together with their descendants.
248+
* They should be accessible from the root returned by the last <code>commitChange</code> call!
249+
*/
250+
public void writeTrimmedSource(Collection<Node> nodesToRemove) throws IOException {
251+
rollbackChange();
252+
253+
assert currentDocumentNodes.containsAll(nodesToRemove);
254+
255+
deleteRegions(calculateTreeCutting(currentRoot, nodesToRemove));
256+
}
257+
258+
public void writeCleanedUpSource() throws IOException {
259+
rollbackChange();
260+
deleteRegions(calculateTreeHolesTrimming());
261+
}
262+
263+
public void writeWithoutEmptyLines() throws IOException {
264+
rollbackChange();
265+
trimEmptyLinesInPlace(scratchFile);
266+
}
267+
268+
@Override
269+
public void close() throws Exception {
270+
Files.delete(lastCommitted);
271+
}
272+
}
Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
/**
2+
* BSD-style license; for more info see http://pmd.sourceforge.net/license.html
3+
*/
4+
5+
package net.sourceforge.pmd.scm;
6+
7+
import java.util.ArrayList;
8+
import java.util.Collections;
9+
import java.util.LinkedHashMap;
10+
import java.util.List;
11+
import java.util.Map;
12+
import java.util.Set;
13+
14+
import net.sourceforge.pmd.lang.LanguageVersion;
15+
import net.sourceforge.pmd.lang.Parser;
16+
import net.sourceforge.pmd.lang.ParserOptions;
17+
import net.sourceforge.pmd.lang.ast.Node;
18+
import net.sourceforge.pmd.scm.invariants.DummyInvariant;
19+
import net.sourceforge.pmd.scm.invariants.ExitCodeInvariant;
20+
import net.sourceforge.pmd.scm.invariants.InvariantConfiguration;
21+
import net.sourceforge.pmd.scm.invariants.InvariantConfigurationFactory;
22+
import net.sourceforge.pmd.scm.invariants.PrintedMessageInvariant;
23+
import net.sourceforge.pmd.scm.strategies.GreedyStrategy;
24+
import net.sourceforge.pmd.scm.strategies.MinimizationStrategyConfiguration;
25+
import net.sourceforge.pmd.scm.strategies.MinimizationStrategyConfigurationFactory;
26+
import net.sourceforge.pmd.scm.strategies.XPathStrategy;
27+
28+
public abstract class BaseMinimizerLanguageModule implements Language, NodeInformationProvider {
29+
private final net.sourceforge.pmd.lang.Language pmdLanguage;
30+
private final Map<String, MinimizationStrategyConfigurationFactory> strategies = new LinkedHashMap<>();
31+
private final Map<String, InvariantConfigurationFactory> invariantCheckers = new LinkedHashMap<>();
32+
33+
BaseMinimizerLanguageModule(net.sourceforge.pmd.lang.Language pmdLanguage) {
34+
this.pmdLanguage = pmdLanguage;
35+
addInvariant(DummyInvariant.FACTORY);
36+
addInvariant(ExitCodeInvariant.FACTORY);
37+
addInvariant(PrintedMessageInvariant.FACTORY);
38+
addStrategy(XPathStrategy.FACTORY);
39+
addStrategy(GreedyStrategy.FACTORY);
40+
}
41+
42+
protected void addStrategy(MinimizationStrategyConfigurationFactory factory) {
43+
strategies.put(factory.getName(), factory);
44+
}
45+
46+
protected void addInvariant(InvariantConfigurationFactory factory) {
47+
invariantCheckers.put(factory.getName(), factory);
48+
}
49+
50+
@Override
51+
public String getTerseName() {
52+
return pmdLanguage.getTerseName();
53+
}
54+
55+
@Override
56+
public List<String> getStrategyNames() {
57+
return new ArrayList<>(strategies.keySet());
58+
}
59+
60+
@Override
61+
public MinimizationStrategyConfiguration createStrategyConfiguration(String name) {
62+
MinimizationStrategyConfigurationFactory factory = strategies.get(name);
63+
return factory == null ? null : factory.createConfiguration();
64+
}
65+
66+
@Override
67+
public List<String> getInvariantNames() {
68+
return new ArrayList<>(invariantCheckers.keySet());
69+
}
70+
71+
@Override
72+
public InvariantConfiguration createInvariantConfiguration(String name) {
73+
InvariantConfigurationFactory factory = invariantCheckers.get(name);
74+
return factory == null ? null : factory.createConfiguration();
75+
}
76+
77+
@Override
78+
public List<String> getLanguageVersions() {
79+
List<String> result = new ArrayList<>();
80+
for (LanguageVersion version : pmdLanguage.getVersions()) {
81+
result.add(version.getVersion());
82+
}
83+
return result;
84+
}
85+
86+
@Override
87+
public String getDefaultLanguageVersion() {
88+
return pmdLanguage.getDefaultVersion().getVersion();
89+
}
90+
91+
@Override
92+
public NodeInformationProvider getNodeInformationProvider() {
93+
return this;
94+
}
95+
96+
@Override
97+
public Parser getParser(String languageVersion) {
98+
for (LanguageVersion version : pmdLanguage.getVersions()) {
99+
if (version.getVersion().equals(languageVersion)) {
100+
ParserOptions opts = version.getLanguageVersionHandler().getDefaultParserOptions();
101+
return version.getLanguageVersionHandler().getParser(opts);
102+
}
103+
}
104+
return null;
105+
}
106+
107+
public Parser getDefaultParser() {
108+
return getParser(getDefaultLanguageVersion());
109+
}
110+
111+
@Override
112+
public Set<Node> getDirectDependencies(Node node) {
113+
// no need to calculate dependencies since there are no dependencies implemented at all, by default
114+
return Collections.EMPTY_SET;
115+
}
116+
117+
@Override
118+
public Set<Node> getDirectlyDependingNodes(Node node) {
119+
// no need to calculate dependencies since there are no dependencies implemented at all, by default
120+
return Collections.EMPTY_SET;
121+
}
122+
}

0 commit comments

Comments
 (0)