Skip to content

Commit a3d309f

Browse files
committed
[RELEASE] iText 7 pdfSweep - 1.1.1
https://github.com/itext/i7j-pdfsweep/releases/tag/1.1.1 * release/1.1.1: [RELEASE] 1.1.1-SNAPSHOT -> 1.1.1 Fixed an issue where .getFontFromAcroForm() method was called on a pdf without an AcroForm, resulting in a NullPointerException. This is fixed by adding an extra check on the availability of the AcroForm before querying it. Update pom parent with iText 7.0.7 SNAPSHOT Update copyright header Avoid unnecessary TJ operator before writing text with ` or ", handle empty TJ array Process new line operators while text cleaning. Minor changes. Process TJ only with numerical operands. Add a test. Add sources jar 🔖 1.0.3-SNAPSHOT -> 1.1.1-SNAPSHOT
2 parents fbb2461 + 5677d7d commit a3d309f

20 files changed

+201
-72
lines changed

pom.xml

Lines changed: 18 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,12 @@
55
<parent>
66
<groupId>com.itextpdf</groupId>
77
<artifactId>root</artifactId>
8-
<version>7.0.5</version>
8+
<version>7.0.7</version>
99
<relativePath />
1010
</parent>
1111

1212
<artifactId>cleanup</artifactId>
13-
<version>1.1.0</version>
13+
<version>1.1.1</version>
1414

1515
<name>pdfSweep</name>
1616
<url>http://itextpdf.com/</url>
@@ -72,17 +72,17 @@
7272
<artifactId>layout</artifactId>
7373
<version>${itext.version}</version>
7474
</dependency>
75-
<dependency>
76-
<groupId>com.itextpdf</groupId>
77-
<artifactId>pdftest</artifactId>
78-
<version>${itext.version}</version>
79-
<scope>test</scope>
80-
</dependency>
8175
<dependency>
8276
<groupId>org.apache.commons</groupId>
8377
<artifactId>commons-imaging</artifactId>
8478
<version>1.0-SNAPSHOT</version>
8579
</dependency>
80+
<dependency>
81+
<groupId>com.itextpdf</groupId>
82+
<artifactId>pdftest</artifactId>
83+
<version>${itext.version}</version>
84+
<scope>test</scope>
85+
</dependency>
8686
</dependencies>
8787

8888
<build>
@@ -111,12 +111,16 @@
111111
<plugin>
112112
<groupId>org.apache.maven.plugins</groupId>
113113
<artifactId>maven-source-plugin</artifactId>
114-
<version>3.0.0</version>
115-
<configuration>
116-
<excludes>
117-
<exclude>**</exclude>
118-
</excludes>
119-
</configuration>
114+
<version>3.0.1</version>
115+
<executions>
116+
<execution>
117+
<id>attach-sources</id>
118+
<phase>verify</phase>
119+
<goals>
120+
<goal>jar-no-fork</goal>
121+
</goals>
122+
</execution>
123+
</executions>
120124
</plugin>
121125
<plugin>
122126
<groupId>org.apache.maven.plugins</groupId>

src/main/java/com/itextpdf/pdfcleanup/PdfCleanUpEventListener.java

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,6 @@ List<TextRenderInfo> getEncounteredText() {
8484
if (content.size() == 0) {
8585
throw new PdfException(textDataExpected);
8686
}
87-
8887
ArrayList<TextRenderInfo> text = new ArrayList<>(content.size());
8988
for (IEventData data : content) {
9089
if (data instanceof TextRenderInfo) {

src/main/java/com/itextpdf/pdfcleanup/PdfCleanUpProcessor.java

Lines changed: 34 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -48,37 +48,37 @@ This file is part of the iText (R) project.
4848
import com.itextpdf.kernel.color.Color;
4949
import com.itextpdf.kernel.font.PdfFont;
5050
import com.itextpdf.kernel.geom.BezierCurve;
51+
import com.itextpdf.kernel.geom.IShape;
5152
import com.itextpdf.kernel.geom.Matrix;
5253
import com.itextpdf.kernel.geom.Path;
5354
import com.itextpdf.kernel.geom.Point;
5455
import com.itextpdf.kernel.geom.Rectangle;
55-
import com.itextpdf.kernel.geom.IShape;
5656
import com.itextpdf.kernel.geom.Subpath;
57+
import com.itextpdf.kernel.pdf.PdfArray;
5758
import com.itextpdf.kernel.pdf.PdfDictionary;
59+
import com.itextpdf.kernel.pdf.PdfDocument;
60+
import com.itextpdf.kernel.pdf.PdfLiteral;
61+
import com.itextpdf.kernel.pdf.PdfName;
62+
import com.itextpdf.kernel.pdf.PdfNumber;
63+
import com.itextpdf.kernel.pdf.PdfObject;
5864
import com.itextpdf.kernel.pdf.PdfPage;
65+
import com.itextpdf.kernel.pdf.PdfResources;
66+
import com.itextpdf.kernel.pdf.PdfStream;
67+
import com.itextpdf.kernel.pdf.PdfTextArray;
5968
import com.itextpdf.kernel.pdf.annot.PdfAnnotation;
6069
import com.itextpdf.kernel.pdf.annot.PdfLinkAnnotation;
6170
import com.itextpdf.kernel.pdf.annot.PdfTextMarkupAnnotation;
6271
import com.itextpdf.kernel.pdf.canvas.CanvasGraphicsState;
6372
import com.itextpdf.kernel.pdf.canvas.CanvasTag;
73+
import com.itextpdf.kernel.pdf.canvas.PdfCanvas;
6474
import com.itextpdf.kernel.pdf.canvas.PdfCanvasConstants;
75+
import com.itextpdf.kernel.pdf.canvas.PdfCanvasConstants.FillingRule;
6576
import com.itextpdf.kernel.pdf.canvas.parser.EventType;
77+
import com.itextpdf.kernel.pdf.canvas.parser.PdfCanvasProcessor;
6678
import com.itextpdf.kernel.pdf.canvas.parser.data.IEventData;
6779
import com.itextpdf.kernel.pdf.canvas.parser.data.ImageRenderInfo;
6880
import com.itextpdf.kernel.pdf.canvas.parser.data.PathRenderInfo;
69-
import com.itextpdf.kernel.pdf.canvas.parser.PdfCanvasProcessor;
7081
import com.itextpdf.kernel.pdf.canvas.parser.data.TextRenderInfo;
71-
import com.itextpdf.kernel.pdf.PdfArray;
72-
import com.itextpdf.kernel.pdf.PdfDocument;
73-
import com.itextpdf.kernel.pdf.PdfLiteral;
74-
import com.itextpdf.kernel.pdf.PdfName;
75-
import com.itextpdf.kernel.pdf.PdfNumber;
76-
import com.itextpdf.kernel.pdf.PdfObject;
77-
import com.itextpdf.kernel.pdf.PdfResources;
78-
import com.itextpdf.kernel.pdf.PdfStream;
79-
import com.itextpdf.kernel.pdf.PdfTextArray;
80-
import com.itextpdf.kernel.pdf.canvas.PdfCanvas;
81-
import com.itextpdf.kernel.pdf.canvas.PdfCanvasConstants.FillingRule;
8282
import com.itextpdf.kernel.pdf.colorspace.PdfShading;
8383
import com.itextpdf.kernel.pdf.tagutils.TagTreePointer;
8484
import com.itextpdf.kernel.pdf.xobject.PdfFormXObject;
@@ -512,14 +512,22 @@ private void filterContent(String operator, List<PdfObject> operands) {
512512
}
513513

514514
private void cleanText(String operator, List<PdfObject> operands) {
515-
List<TextRenderInfo> textChunks = getEventListener().getEncounteredText();
515+
List<TextRenderInfo> textChunks = null;
516516
PdfArray cleanedText = null;
517517
if ("TJ".equals(operator)) {
518518
PdfArray originalTJ = (PdfArray) operands.get(0);
519+
if (originalTJ.isEmpty()) {
520+
// empty TJ neither shows any text nor affects text positioning
521+
// we can safely ignore it
522+
return;
523+
}
519524
int i = 0; // text chunk index in original TJ
520525
PdfTextArray newTJ = new PdfTextArray();
521526
for (PdfObject e : originalTJ) {
522527
if (e.isString()) {
528+
if (null == textChunks) {
529+
textChunks = ((PdfCleanUpEventListener) getEventListener()).getEncounteredText();
530+
}
523531
PdfArray filteredText = filter.filterText(textChunks.get(i++)).getFilterResult();
524532
newTJ.addAll(filteredText);
525533
} else {
@@ -529,24 +537,34 @@ private void cleanText(String operator, List<PdfObject> operands) {
529537

530538
cleanedText = newTJ;
531539
} else { // if operator is Tj or ' or "
540+
textChunks = ((PdfCleanUpEventListener) getEventListener()).getEncounteredText();
532541
PdfCleanUpFilter.FilterResult<PdfArray> filterResult = filter.filterText(textChunks.get(0));
533542
if (filterResult.isModified()) {
534543
cleanedText = filterResult.getFilterResult();
535544
}
536545
}
537-
538-
TextRenderInfo text = textChunks.get(0); // all text chunks even in case of TJ have the same graphics state
539546
// if text wasn't modified cleanedText is null
540547
if (cleanedText == null || cleanedText.size() != 1 || !cleanedText.get(0).isNumber()) {
548+
if (null == textChunks) {
549+
textChunks = ((PdfCleanUpEventListener) getEventListener()).getEncounteredText();
550+
}
551+
TextRenderInfo text = textChunks.get(0); // all text chunks even in case of TJ have the same graphics state
541552
writeNotAppliedGsParamsForText(text);
542553
beginTextObjectAndOpenNotWrittenTags();
543554

544555
writeNotAppliedTextStateParams(text);
545556
textPositioning.writePositionedText(operator, operands, cleanedText, getCanvas());
546557
} else { // cleaned text is tj array with single number - it means that the whole text chunk was removed
547558
CanvasGraphicsState gs = getCanvas().getGraphicsState();
559+
// process new lines if necessary
560+
if ("'".equals(operator) || "\"".equals(operator)) {
561+
List<PdfObject> newLineList = new ArrayList<>();
562+
newLineList.add(new PdfLiteral("T*"));
563+
textPositioning.appendPositioningOperator("T*", newLineList);
564+
}
548565
textPositioning.appendTjArrayWithSingleNumber(cleanedText, gs.getFontSize(), gs.getHorizontalScaling());
549566
}
567+
550568
}
551569

552570
private void beginTextObjectAndOpenNotWrittenTags() {

src/main/java/com/itextpdf/pdfcleanup/PdfCleanUpTool.java

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -453,11 +453,17 @@ private void drawRolloverAppearance(PdfCanvas canvas, PdfStream redactRolloverAp
453453
}
454454

455455
private void drawOverlayText(PdfCanvas canvas, String overlayText, Rectangle annotRect, PdfBoolean repeat, PdfString defaultAppearance, int justification) throws IOException {
456-
Map<String, List> parsedDA = parseDAParam(defaultAppearance);
456+
Map<String, List> parsedDA;
457+
try {
458+
parsedDA = parseDAParam(defaultAppearance);
459+
}catch (NullPointerException npe){
460+
throw new PdfException(PdfException.DefaultAppearanceNotFound);
461+
}
457462
PdfFont font;
458463
float fontSize = 12;
459464
List fontArgs = parsedDA.get("Tf");
460-
if (fontArgs != null) {
465+
PdfDictionary formDictionary = pdfDocument.getCatalog().getPdfObject().getAsDictionary(PdfName.AcroForm);
466+
if (fontArgs != null && formDictionary != null) {
461467
font = getFontFromAcroForm((PdfName) fontArgs.get(0));
462468
fontSize = ((PdfNumber) fontArgs.get(1)).floatValue();
463469
} else {

src/main/java/com/itextpdf/pdfcleanup/TextPositioning.java

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -93,9 +93,7 @@ void appendPositioningOperator(String operator, List<PdfObject> operands) {
9393
return;
9494
}
9595

96-
if (removedTextShift != null) {
97-
removedTextShift = null;
98-
}
96+
removedTextShift = null;
9997

10098
if (prevOperator == null) {
10199
firstPositioningOperands = new ArrayList<>(operands);
@@ -187,20 +185,23 @@ private void writePositioningOperator(PdfCanvas canvas) {
187185
private void writeText(String operator, List<PdfObject> operands, PdfArray cleanedText, PdfCanvas canvas) {
188186
CanvasGraphicsState canvasGs = canvas.getGraphicsState();
189187
boolean newLineShowText = "'".equals(operator) || "\"".equals(operator);
190-
if (newLineShowText && canvasGs.getLeading() != currLeading) {
191-
canvas.setLeading((float) currLeading);
188+
if (newLineShowText) {
189+
if (canvasGs.getLeading() != currLeading) {
190+
canvas.setLeading((float) currLeading);
191+
}
192+
// after new line operator, removed text shift doesn't matter
193+
removedTextShift = null;
192194
}
193-
PdfTextArray tjShiftArray = new PdfTextArray();
195+
PdfTextArray tjShiftArray = null;
194196
if (removedTextShift != null) {
195197
float tjShift = (float) removedTextShift * 1000 / (canvasGs.getFontSize() * canvasGs.getHorizontalScaling() / 100);
198+
tjShiftArray = new PdfTextArray();
196199
tjShiftArray.add(new PdfNumber(tjShift));
197200
}
198201
if (cleanedText != null) {
199202
if (newLineShowText) {
200203
// char spacing and word spacing are set via writeNotAppliedTextStateParams() method
201204
canvas.newlineText();
202-
// after new line operator, removed text shift doesn't matter
203-
removedTextShift = null;
204205
}
205206
if (removedTextShift != null) {
206207
tjShiftArray.addAll(cleanedText);

src/main/java/com/itextpdf/pdfcleanup/autosweep/PdfAutoSweep.java

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -40,15 +40,14 @@ This file is part of the iText (R) project.
4040
For more information, please contact iText Software Corp. at this
4141
4242
*/
43-
/*
44-
* To change this license header, choose License Headers in Project Properties.
45-
* To change this template file, choose Tools | Templates
46-
* and open the template in the editor.
47-
*/
4843
package com.itextpdf.pdfcleanup.autosweep;
4944

5045
import com.itextpdf.kernel.geom.Rectangle;
51-
import com.itextpdf.kernel.pdf.*;
46+
import com.itextpdf.kernel.pdf.PdfArray;
47+
import com.itextpdf.kernel.pdf.PdfDocument;
48+
import com.itextpdf.kernel.pdf.PdfName;
49+
import com.itextpdf.kernel.pdf.PdfPage;
50+
import com.itextpdf.kernel.pdf.PdfString;
5251
import com.itextpdf.kernel.pdf.annot.PdfAnnotation;
5352
import com.itextpdf.kernel.pdf.annot.PdfRedactAnnotation;
5453
import com.itextpdf.kernel.pdf.canvas.PdfCanvas;
@@ -58,7 +57,12 @@ This file is part of the iText (R) project.
5857
import com.itextpdf.pdfcleanup.PdfCleanUpTool;
5958

6059
import java.io.IOException;
61-
import java.util.*;
60+
import java.util.ArrayList;
61+
import java.util.Comparator;
62+
import java.util.HashSet;
63+
import java.util.List;
64+
import java.util.Random;
65+
import java.util.Set;
6266

6367
/**
6468
* Class that automatically extracts all regions of interest from a given PdfDocument and redacts them.

src/test/java/com/itextpdf/pdfcleanup/BigDocumentAutoCleanUpTest.java

Lines changed: 6 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -40,11 +40,6 @@ This file is part of the iText (R) project.
4040
For more information, please contact iText Software Corp. at this
4141
4242
*/
43-
/*
44-
* To change this license header, choose License Headers in Project Properties.
45-
* To change this template file, choose Tools | Templates
46-
* and open the template in the editor.
47-
*/
4843
package com.itextpdf.pdfcleanup;
4944

5045
import com.itextpdf.kernel.color.Color;
@@ -57,11 +52,6 @@ This file is part of the iText (R) project.
5752
import com.itextpdf.kernel.pdf.canvas.parser.listener.IPdfTextLocation;
5853
import com.itextpdf.kernel.pdf.canvas.parser.listener.RegexBasedLocationExtractionStrategy;
5954
import com.itextpdf.kernel.utils.CompareTool;
60-
import java.lang.reflect.Array;
61-
import java.lang.reflect.Constructor;
62-
import java.lang.reflect.Method;
63-
import com.itextpdf.pdfcleanup.PdfCleanupProductInfo;
64-
import com.itextpdf.kernel.Version;
6555
import com.itextpdf.pdfcleanup.autosweep.CompositeCleanupStrategy;
6656
import com.itextpdf.pdfcleanup.autosweep.ICleanupStrategy;
6757
import com.itextpdf.pdfcleanup.autosweep.PdfAutoSweep;
@@ -81,9 +71,6 @@ This file is part of the iText (R) project.
8171

8272
import static com.itextpdf.test.ITextTest.createOrClearDestinationFolder;
8373

84-
/**
85-
* @author Joris Schellekens
86-
*/
8774
@Category(IntegrationTest.class)
8875
public class BigDocumentAutoCleanUpTest {
8976

@@ -116,7 +103,7 @@ public void redactLipsum() throws IOException, InterruptedException {
116103
pdf.close();
117104

118105
// compare
119-
compareByContent(cmp, output, outputPath, "diff_redactLipsum_");
106+
compareResults(cmp, output, outputPath, "diff_redactLipsum_");
120107
}
121108

122109
@Test
@@ -142,7 +129,7 @@ public void redactTonySoprano() throws IOException, InterruptedException {
142129
pdf.close();
143130

144131
// compare
145-
compareByContent(cmp, output, outputPath, "diff_redactTonySoprano_");
132+
compareResults(cmp, output, outputPath, "diff_redactTonySoprano_");
146133
}
147134

148135
@Test
@@ -164,7 +151,7 @@ public void redactIPhoneUserManualMatchColor() throws IOException, InterruptedEx
164151
pdf.close();
165152

166153
// compare
167-
compareByContent(cmp, output, outputPath, "diff_redactIPhoneUserManualMatchColor_");
154+
compareResults(cmp, output, outputPath, "diff_redactIPhoneUserManualMatchColor_");
168155
}
169156

170157
@Test
@@ -187,10 +174,10 @@ public void redactIPhoneUserManual() throws IOException, InterruptedException {
187174
pdf.close();
188175

189176
// compare
190-
compareByContent(cmp, output, outputPath, "diff_redactIPhoneUserManual_");
177+
compareResults(cmp, output, outputPath, "diff_redactIPhoneUserManual_");
191178
}
192179

193-
private void compareByContent(String cmp, String output, String targetDir, String diffPrefix) throws IOException, InterruptedException {
180+
private void compareResults(String cmp, String output, String targetDir, String diffPrefix) throws IOException, InterruptedException {
194181
CompareTool cmpTool = new CompareTool();
195182
String errorMessage = cmpTool.compareVisually(output, cmp, targetDir, diffPrefix + "_");
196183

@@ -258,6 +245,7 @@ public Color getRedactionColor(IPdfTextLocation rect) {
258245
return colorByRectangle.containsKey(rect.getRectangle()) ? colorByRectangle.get(rect.getRectangle()) : Color.BLACK;
259246
}
260247

248+
@Override
261249
public ICleanupStrategy reset()
262250
{
263251
return new CustomLocationExtractionStrategy(regex);

0 commit comments

Comments
 (0)