Skip to content

Commit c494890

Browse files
committed
Add basic static analysis for PDF content streams
This is pretty basic as a proof of concept. It can currently show the following issues: * Array/Dictionary/String object was not closed. * Unnecessary whitespace at the end of lines. * Unexpected tokens. * Operand count and type for path construction operators.
1 parent a81f6cc commit c494890

File tree

6 files changed

+543
-18
lines changed

6 files changed

+543
-18
lines changed

src/main/java/com/itextpdf/rups/model/contentstream/ParseTreeNode.java

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -269,6 +269,50 @@ public int getTextCount() {
269269
return textCount;
270270
}
271271

272+
/**
273+
* Returns the start offset for the node. If this is a primitive node,
274+
* then it is equivalent to calling {@link #getTextOffset()}. But if it is
275+
* a composite node, it returns the text offset of the leftmost
276+
* primitive descendant.
277+
*
278+
* @return The start offset for the node.
279+
*/
280+
public int getStartOffset() {
281+
if (textArray != null) {
282+
return textOffset;
283+
}
284+
ParseTreeNode child = getFirstChild();
285+
while (child != null) {
286+
if (child.textArray != null) {
287+
return child.textOffset;
288+
}
289+
child = child.getFirstChild();
290+
}
291+
return 0;
292+
}
293+
294+
/**
295+
* Returns the end offset for the node. If this is a primitive node, then
296+
* it is equivalent to summing {@link #getTextOffset()} and
297+
* {@link #getTextCount()}. But if it is a composite node, it returns the
298+
* end offset of the leftmost primitive descendant.
299+
*
300+
* @return The start offset for the node.
301+
*/
302+
public int getEndOffset() {
303+
if (textArray != null) {
304+
return textOffset + textCount;
305+
}
306+
ParseTreeNode child = getLastChild();
307+
while (child != null) {
308+
if (child.textArray != null) {
309+
return child.textOffset + child.textCount;
310+
}
311+
child = child.getLastChild();
312+
}
313+
return 0;
314+
}
315+
272316
/**
273317
* Returns the first child of a node, or null, if it is a leaf.
274318
*

src/main/java/com/itextpdf/rups/view/Language.java

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,21 @@ public enum Language {
197197
PAGE_NUMBER,
198198
PAGES,
199199
PAGES_TABLE_OBJECT,
200+
201+
PARSER_NOT_CLOSED_ARRAY,
202+
PARSER_NOT_CLOSED_DICTIONARY,
203+
PARSER_NOT_CLOSED_STRING_HEX,
204+
PARSER_NOT_CLOSED_STRING_LITERAL,
205+
PARSER_OPERAND_TYPES_C,
206+
PARSER_OPERAND_TYPES_H,
207+
PARSER_OPERAND_TYPES_L,
208+
PARSER_OPERAND_TYPES_M,
209+
PARSER_OPERAND_TYPES_RE,
210+
PARSER_OPERAND_TYPES_V,
211+
PARSER_OPERAND_TYPES_Y,
212+
PARSER_UNEXPECTED_TOKEN,
213+
PARSER_WASTEFUL_WHITESPACE,
214+
200215
PDF_READING,
201216
PDF_OBJECT_TREE,
202217
PLAINTEXT,

src/main/java/com/itextpdf/rups/view/itext/StreamTextEditorPane.java

Lines changed: 17 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ This file is part of the iText (R) project.
5858
import com.itextpdf.rups.view.contextmenu.StreamPanelContextMenu;
5959
import com.itextpdf.rups.view.itext.editor.Latin1Filter;
6060
import com.itextpdf.rups.view.itext.editor.PdfFoldParser;
61+
import com.itextpdf.rups.view.itext.editor.PdfParser;
6162
import com.itextpdf.rups.view.itext.editor.PdfTokenMaker;
6263
import com.itextpdf.rups.view.itext.editor.PdfTokenPainterFactory;
6364
import com.itextpdf.rups.view.itext.treenodes.PdfObjectTreeNode;
@@ -73,6 +74,7 @@ This file is part of the iText (R) project.
7374
import org.fife.ui.rsyntaxtextarea.DefaultTokenPainterFactory;
7475
import org.fife.ui.rsyntaxtextarea.RSyntaxDocument;
7576
import org.fife.ui.rsyntaxtextarea.RSyntaxTextArea;
77+
import org.fife.ui.rsyntaxtextarea.SyntaxConstants;
7678
import org.fife.ui.rsyntaxtextarea.TokenMakerFactory;
7779
import org.fife.ui.rsyntaxtextarea.folding.FoldParserManager;
7880
import org.fife.ui.rtextarea.ExpandedFoldRenderStrategy;
@@ -82,11 +84,7 @@ public final class StreamTextEditorPane extends RTextScrollPane implements IRups
8284
/**
8385
* MIME type for a PDF content stream.
8486
*/
85-
private static final String MIME_PDF = "application/pdf";
86-
/**
87-
* MIME type for plain text.
88-
*/
89-
private static final String MIME_PLAIN_TEXT = "plain/text";
87+
private static final String SYNTAX_STYLE_PDF = "application/pdf";
9088

9189
/**
9290
* Char buffer with a single LF character.
@@ -115,8 +113,8 @@ public final class StreamTextEditorPane extends RTextScrollPane implements IRups
115113
*/
116114
final AbstractTokenMakerFactory tokenMakerFactory =
117115
(AbstractTokenMakerFactory) TokenMakerFactory.getDefaultInstance();
118-
tokenMakerFactory.putMapping(MIME_PDF, PdfTokenMaker.class.getName());
119-
FoldParserManager.get().addFoldParserMapping(MIME_PDF, new PdfFoldParser());
116+
tokenMakerFactory.putMapping(SYNTAX_STYLE_PDF, PdfTokenMaker.class.getName());
117+
FoldParserManager.get().addFoldParserMapping(SYNTAX_STYLE_PDF, new PdfFoldParser());
120118
/*
121119
* There doesn't seem to be a good way to detect, whether you can call
122120
* setData on a PdfStream or not in advance. It cannot be called if a
@@ -187,7 +185,7 @@ public void render(PdfObjectTreeNode target) {
187185
// Assuming that this will stop parsing for a moment...
188186
getTextArea().setVisible(false);
189187
String textToSet;
190-
String mimeToSet;
188+
String styleToSet;
191189
boolean editableToSet;
192190
/*
193191
* TODO: Differentiate between different content. See below.
@@ -207,21 +205,21 @@ public void render(PdfObjectTreeNode target) {
207205
try {
208206
if (isFont(stream) || isImage(stream)) {
209207
textToSet = getText(stream, false);
210-
mimeToSet = MIME_PLAIN_TEXT;
208+
styleToSet = SyntaxConstants.SYNTAX_STYLE_NONE;
211209
editableToSet = false;
212210
} else {
213211
textToSet = prepareContentStreamText(getText(stream, true));
214-
mimeToSet = MIME_PDF;
212+
styleToSet = SYNTAX_STYLE_PDF;
215213
editableToSet = true;
216214
}
217215
setTextEditableRoutine(true);
218216
} catch (RuntimeException e) {
219217
LoggerHelper.error(Language.ERROR_UNEXPECTED_EXCEPTION.getString(), e, getClass());
220218
textToSet = "";
221-
mimeToSet = MIME_PLAIN_TEXT;
219+
styleToSet = SyntaxConstants.SYNTAX_STYLE_NONE;
222220
editableToSet = false;
223221
}
224-
setContentType(mimeToSet);
222+
setContentType(styleToSet);
225223
getTextArea().setText(textToSet);
226224
getTextArea().setCaretPosition(0);
227225
setTextEditableRoutine(editableToSet);
@@ -316,8 +314,8 @@ private void clearPane() {
316314
setTextEditableRoutine(false);
317315
}
318316

319-
private void setContentType(String mime) {
320-
setContentType(getTextArea(), mime);
317+
private void setContentType(String style) {
318+
setContentType(getTextArea(), style);
321319
}
322320

323321
private void setUndoEnabled(boolean enabled) {
@@ -404,7 +402,8 @@ private static RSyntaxTextArea createTextArea() {
404402
* metadata we should just use the regular XML editor available. But
405403
* by default we will just assume a PDF content stream.
406404
*/
407-
setContentType(textArea, MIME_PDF);
405+
setContentType(textArea, SYNTAX_STYLE_PDF);
406+
textArea.addParser(new PdfParser());
408407
// This will allow to fold code blocks (like BT/ET blocks)
409408
textArea.setCodeFoldingEnabled(true);
410409
// This will automatically add tabulations, when you enter a new line
@@ -417,15 +416,15 @@ private static RSyntaxTextArea createTextArea() {
417416
return textArea;
418417
}
419418

420-
private static void setContentType(RSyntaxTextArea textArea, String mime) {
421-
if (MIME_PDF.equals(mime)) {
419+
private static void setContentType(RSyntaxTextArea textArea, String style) {
420+
if (SYNTAX_STYLE_PDF.equals(style)) {
422421
getDocument(textArea).setDocumentFilter(new Latin1Filter());
423422
textArea.setTokenPainterFactory(new PdfTokenPainterFactory());
424423
} else {
425424
getDocument(textArea).setDocumentFilter(null);
426425
textArea.setTokenPainterFactory(new DefaultTokenPainterFactory());
427426
}
428-
textArea.setSyntaxEditingStyle(mime);
427+
textArea.setSyntaxEditingStyle(style);
429428
}
430429

431430
private static String getText(PdfStream stream, boolean decoded) {

0 commit comments

Comments
 (0)