Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit 6e13b38

Browse files
committedApr 16, 2018
[RELEASE] iText 7 pdfSweep - 2.0.1
https://github.com/itext/i7j-pdfsweep/releases/tag/2.0.1 * release/2.0.1: [RELEASE] 2.0.1-SNAPSHOT -> 2.0.1 Introduce reusing of already filtered images Bump iText Core dependency version Fixed an issue where .getFontFromAcroForm() method was called on a pdf without an AcroForm, resulting in a NullPointerException. This is fixed by adding an extra check on the availability of the AcroForm before querying it. Also changed .drawOverlayText() so instead of throwing a NullPointerException it throws a PdfException. Update copyright header Remove empty @param and @return Javadoc tags [RELEASE] Update pom for 2.0.1-SNAPSHOT development Avoid unnecessary TJ operator before writing text with ` or ", handle empty TJ array Process new line operators while text cleaning. Minor changes.
2 parents 7973d2c + bd2af16 commit 6e13b38

36 files changed

+581
-113
lines changed
 

‎pom.xml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,12 @@
55
<parent>
66
<groupId>com.itextpdf</groupId>
77
<artifactId>root</artifactId>
8-
<version>7.1.0</version>
8+
<version>7.1.2</version>
99
<relativePath />
1010
</parent>
1111

1212
<artifactId>cleanup</artifactId>
13-
<version>2.0.0</version>
13+
<version>2.0.1</version>
1414

1515
<name>pdfSweep</name>
1616
<url>http://itextpdf.com/</url>
Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
package com.itextpdf.pdfcleanup;
2+
3+
import com.itextpdf.kernel.geom.Rectangle;
4+
import com.itextpdf.kernel.pdf.PdfDocument;
5+
import com.itextpdf.kernel.pdf.PdfIndirectReference;
6+
import com.itextpdf.kernel.pdf.PdfStream;
7+
import com.itextpdf.kernel.pdf.canvas.parser.data.ImageRenderInfo;
8+
import com.itextpdf.kernel.pdf.xobject.PdfImageXObject;
9+
import java.util.ArrayList;
10+
import java.util.Collections;
11+
import java.util.Comparator;
12+
import java.util.HashMap;
13+
import java.util.LinkedHashSet;
14+
import java.util.List;
15+
import java.util.Map;
16+
import java.util.Set;
17+
18+
class FilteredImagesCache {
19+
private Map<PdfIndirectReference, List<FilteredImageKey>> cache = new HashMap<>();
20+
21+
static FilteredImageKey createFilteredImageKey(ImageRenderInfo image, List<Rectangle> areasToBeCleaned, PdfDocument document) {
22+
PdfStream imagePdfObject = image.getImage().getPdfObject();
23+
if (imagePdfObject.getIndirectReference() == null) {
24+
imagePdfObject.makeIndirect(document);
25+
}
26+
return new FilteredImageKey(image, areasToBeCleaned);
27+
}
28+
29+
/**
30+
* Retrieves saved result of image filtering based on given set of cleaning areas.
31+
* This won't handle the case when same filtering result is produced by different sets of areas,
32+
* e.g. if one set is { (0, 0, 50, 100), (50, 0, 50, 100)} and another one is {(0, 0, 100, 100)},
33+
* even though filtering results are essentially the same, current {@link FilteredImagesCache}
34+
* will treat this two cases as different filtering results.
35+
* @param imageKey the defining filtering case
36+
* @return result of image filtering based on given set of cleaning areas if such was already processed and saved,
37+
* null otherwise.
38+
*/
39+
PdfImageXObject get(FilteredImageKey imageKey) {
40+
List<FilteredImageKey> cachedFilteredImageKeys = cache.get(imageKey.getImageIndRef());
41+
if (cachedFilteredImageKeys != null) {
42+
for (FilteredImageKey cacheKey : cachedFilteredImageKeys) {
43+
if (rectanglesEqualWithEps(cacheKey.getCleanedAreas(), imageKey.getCleanedAreas())) {
44+
return cacheKey.getFilteredImage();
45+
}
46+
}
47+
}
48+
return null;
49+
}
50+
51+
void put(FilteredImageKey imageKey, PdfImageXObject filteredImage) {
52+
if (imageKey.getCleanedAreas() == null || imageKey.getCleanedAreas().isEmpty()) {
53+
return;
54+
}
55+
List<FilteredImageKey> filteredImageKeys = cache.get(imageKey.getImageIndRef());
56+
if (filteredImageKeys == null) {
57+
cache.put(imageKey.getImageIndRef(), filteredImageKeys = new ArrayList<>());
58+
}
59+
filteredImageKeys.add(imageKey);
60+
imageKey.setFilteredImage(filteredImage);
61+
}
62+
63+
private boolean rectanglesEqualWithEps(List<Rectangle> cacheRects, List<Rectangle> keyRects) {
64+
if (keyRects == null || cacheRects.size() != keyRects.size()) {
65+
return false;
66+
}
67+
68+
Set<Rectangle> cacheRectsSet = new LinkedHashSet<>(cacheRects);
69+
for (Rectangle keyArea : keyRects) {
70+
boolean found = false;
71+
for (Rectangle cacheArea : cacheRectsSet) {
72+
if (keyArea.equalsWithEpsilon(cacheArea)) {
73+
found = true;
74+
cacheRectsSet.remove(cacheArea);
75+
break;
76+
}
77+
}
78+
if (!found) {
79+
break;
80+
}
81+
}
82+
return cacheRectsSet.isEmpty();
83+
}
84+
85+
static class FilteredImageKey {
86+
private ImageRenderInfo image;
87+
private List<Rectangle> cleanedAreas;
88+
private PdfImageXObject filteredImage;
89+
90+
FilteredImageKey(ImageRenderInfo imageInfo, List<Rectangle> cleanedAreas) {
91+
this.image = imageInfo;
92+
this.cleanedAreas = cleanedAreas;
93+
}
94+
95+
List<Rectangle> getCleanedAreas() {
96+
return cleanedAreas;
97+
}
98+
99+
ImageRenderInfo getImageRenderInfo() {
100+
return image;
101+
}
102+
103+
PdfIndirectReference getImageIndRef() {
104+
return image.getImage().getPdfObject().getIndirectReference();
105+
}
106+
107+
PdfImageXObject getFilteredImage() {
108+
return filteredImage;
109+
}
110+
111+
void setFilteredImage(PdfImageXObject filteredImage) {
112+
this.filteredImage = filteredImage;
113+
}
114+
}
115+
}

‎src/main/java/com/itextpdf/pdfcleanup/LineDashPattern.java

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -326,7 +326,6 @@ private static Point getUnitVector(Point vector) {
326326
* This is the Euclidean distance between the tip of the vector and the origin.
327327
*
328328
* @param vector input vector
329-
* @return
330329
*/
331330
private static double getVectorEuclideanNorm(Point vector) {
332331
return vector.distance(0, 0);
@@ -355,7 +354,6 @@ private static float applyDash(Path dashedPath, Point segStart, Point segEnd, Po
355354
* @param segStart start of the line segment
356355
* @param segEnd end of the line segment
357356
* @param point query point
358-
* @return
359357
*/
360358
private static boolean liesOnSegment(Point segStart, Point segEnd, Point point) {
361359
return point.getX() >= Math.min(segStart.getX(), segEnd.getX()) &&

‎src/main/java/com/itextpdf/pdfcleanup/PdfCleanUpEventListener.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,9 @@ public void eventOccurred(IEventData data, EventType type) {
8181
* @return the TextRenderInfo objects that were encountered when processing the last text rendering operation
8282
*/
8383
List<TextRenderInfo> getEncounteredText() {
84+
if (content.size() == 0) {
85+
throw new PdfException(textDataExpected);
86+
}
8487
ArrayList<TextRenderInfo> text = new ArrayList<>(content.size());
8588
for (IEventData data : content) {
8689
if (data instanceof TextRenderInfo) {

‎src/main/java/com/itextpdf/pdfcleanup/PdfCleanUpFilter.java

Lines changed: 20 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,10 @@ This file is part of the iText (R) project.
5454
import com.itextpdf.kernel.geom.Rectangle;
5555
import com.itextpdf.kernel.geom.Subpath;
5656
import com.itextpdf.kernel.pdf.PdfArray;
57+
import com.itextpdf.kernel.pdf.PdfDocument;
58+
import com.itextpdf.kernel.pdf.PdfIndirectReference;
5759
import com.itextpdf.kernel.pdf.PdfNumber;
60+
import com.itextpdf.kernel.pdf.PdfStream;
5861
import com.itextpdf.kernel.pdf.PdfTextArray;
5962
import com.itextpdf.kernel.pdf.canvas.PdfCanvasConstants;
6063
import com.itextpdf.kernel.pdf.canvas.parser.clipper.ClipperBridge;
@@ -71,6 +74,7 @@ This file is part of the iText (R) project.
7174
import com.itextpdf.kernel.pdf.canvas.parser.data.ImageRenderInfo;
7275
import com.itextpdf.kernel.pdf.canvas.parser.data.PathRenderInfo;
7376
import com.itextpdf.kernel.pdf.canvas.parser.data.TextRenderInfo;
77+
import com.itextpdf.kernel.pdf.xobject.PdfImageXObject;
7478
import org.apache.commons.imaging.ImageFormats;
7579
import org.apache.commons.imaging.ImageInfo;
7680
import org.apache.commons.imaging.ImageReadException;
@@ -120,8 +124,6 @@ public PdfCleanUpFilter(List<Rectangle> regions) {
120124

121125
/**
122126
* Generic class representing the result of filtering an object of type T
123-
*
124-
* @param <T>
125127
*/
126128
static class FilterResult<T> {
127129
private boolean isModified;
@@ -143,8 +145,6 @@ boolean isModified() {
143145

144146
/**
145147
* Get the result after filtering
146-
*
147-
* @return
148148
*/
149149
T getFilterResult() {
150150
return filterResult;
@@ -155,7 +155,6 @@ T getFilterResult() {
155155
* Filter a TextRenderInfo object
156156
*
157157
* @param text the TextRenderInfo to be filtered
158-
* @return
159158
*/
160159
FilterResult<PdfArray> filterText(TextRenderInfo text) {
161160
PdfTextArray textArray = new PdfTextArray();
@@ -177,24 +176,34 @@ FilterResult<PdfArray> filterText(TextRenderInfo text) {
177176
return new FilterResult<PdfArray>(true, textArray);
178177
}
179178

179+
FilteredImagesCache.FilteredImageKey createFilteredImageKey(ImageRenderInfo image, PdfDocument document) {
180+
return FilteredImagesCache.createFilteredImageKey(image, getImageAreasToBeCleaned(image), document);
181+
}
182+
180183
/**
181184
* Filter an ImageRenderInfo object
182185
*
183186
* @param image the ImageRenderInfo object to be filtered
184-
* @return
185187
*/
186188
FilterResult<ImageData> filterImage(ImageRenderInfo image) {
187-
List<Rectangle> areasToBeCleaned = getImageAreasToBeCleaned(image);
188-
if (areasToBeCleaned == null) {
189+
return filterImage(image, getImageAreasToBeCleaned(image));
190+
}
191+
192+
FilterResult<ImageData> filterImage(FilteredImagesCache.FilteredImageKey imageKey) {
193+
return filterImage(imageKey.getImageRenderInfo(), imageKey.getCleanedAreas());
194+
}
195+
196+
FilterResult<ImageData> filterImage(ImageRenderInfo image, List<Rectangle> imageAreasToBeCleaned) {
197+
if (imageAreasToBeCleaned == null) {
189198
return new FilterResult<>(true, null);
190-
} else if (areasToBeCleaned.isEmpty()) {
191-
return new FilterResult<>(false, ImageDataFactory.create(image.getImage().getImageBytes()));
199+
} else if (imageAreasToBeCleaned.isEmpty()) {
200+
return new FilterResult<>(false, null);
192201
}
193202

194203
byte[] filteredImageBytes;
195204
try {
196205
byte[] originalImageBytes = image.getImage().getImageBytes();
197-
filteredImageBytes = processImage(originalImageBytes, areasToBeCleaned);
206+
filteredImageBytes = processImage(originalImageBytes, imageAreasToBeCleaned);
198207
} catch (Exception e) {
199208
throw new RuntimeException(e);
200209
}
@@ -206,7 +215,6 @@ FilterResult<ImageData> filterImage(ImageRenderInfo image) {
206215
* Filter a PathRenderInfo object
207216
*
208217
* @param path the PathRenderInfo object to be filtered
209-
* @return
210218
*/
211219
com.itextpdf.kernel.geom.Path filterStrokePath(PathRenderInfo path) {
212220
PdfArray dashPattern = path.getLineDashPattern();
@@ -220,7 +228,6 @@ com.itextpdf.kernel.geom.Path filterStrokePath(PathRenderInfo path) {
220228
* Filter a PathRenderInfo object
221229
*
222230
* @param path the PathRenderInfo object to be filtered
223-
* @return
224231
*/
225232
com.itextpdf.kernel.geom.Path filterFillPath(PathRenderInfo path, int fillingRule) {
226233
return filterFillPath(path.getPath(), path.getCtm(), fillingRule);
@@ -230,7 +237,6 @@ com.itextpdf.kernel.geom.Path filterFillPath(PathRenderInfo path, int fillingRul
230237
* Returns whether the given TextRenderInfo object needs to be cleaned up
231238
*
232239
* @param renderInfo the input TextRenderInfo object
233-
* @return
234240
*/
235241
private boolean isTextNotToBeCleaned(TextRenderInfo renderInfo) {
236242
Point[] textRect = getTextRectangle(renderInfo);
@@ -402,7 +408,6 @@ private void cleanImage(BufferedImage image, List<Rectangle> areasToBeCleaned) {
402408
* Get the bytes of the BufferedImage (in JPG format)
403409
*
404410
* @param image input image
405-
* @return
406411
*/
407412
private byte[] getJPGBytes(BufferedImage image) {
408413
ByteArrayOutputStream outputStream = null;
@@ -465,10 +470,7 @@ private com.itextpdf.kernel.geom.Path filterStrokePath(com.itextpdf.kernel.geom.
465470
/**
466471
* Note: this method will close all unclosed subpaths of the passed path.
467472
*
468-
* @param path path
469-
* @param ctm ctm
470473
* @param fillingRule If the subpath is contour, pass any value.
471-
* @return filterFillPath
472474
*/
473475
private com.itextpdf.kernel.geom.Path filterFillPath(com.itextpdf.kernel.geom.Path path, Matrix ctm, int fillingRule) {
474476
path.closeAllSubpaths();
@@ -570,7 +572,6 @@ private static List<Subpath> convertToSquares(List<Subpath> degenerateSubpaths,
570572
* Approximates a given Path with a List of Point objects
571573
*
572574
* @param path input path
573-
* @return
574575
*/
575576
private static List<Point> getPathApproximation(com.itextpdf.kernel.geom.Path path) {
576577
List<Point> approx = new ArrayList<Point>() {
@@ -634,7 +635,6 @@ private static Point[] getRotatedSquareVertices(Point[] orthogonalSquareVertices
634635
*
635636
* @param center center of the circle
636637
* @param radius radius of the circle
637-
* @return
638638
*/
639639
private static BezierCurve[] approximateCircle(Point center, double radius) {
640640
// The circle is split into 4 sectors. Arc of each sector
@@ -693,7 +693,6 @@ private Point[] transformPoints(Matrix transformationMatrix, boolean inverse, Po
693693
* Get the bounding box of a TextRenderInfo object
694694
*
695695
* @param renderInfo input TextRenderInfo object
696-
* @return
697696
*/
698697
private Point[] getTextRectangle(TextRenderInfo renderInfo) {
699698
LineSegment ascent = renderInfo.getAscentLine();
@@ -711,7 +710,6 @@ private Point[] getTextRectangle(TextRenderInfo renderInfo) {
711710
* Convert a Rectangle object into 4 Points
712711
*
713712
* @param rect input Rectangle
714-
* @return
715713
*/
716714
private Point[] getRectangleVertices(Rectangle rect) {
717715
Point[] points = {
@@ -731,7 +729,6 @@ private Point[] getRectangleVertices(Rectangle rect) {
731729
* @param p2 second Point
732730
* @param p3 third Point
733731
* @param p4 fourth Point
734-
* @return
735732
*/
736733
private Rectangle getAsRectangle(Point p1, Point p2, Point p3, Point p4) {
737734
List<Double> xs = Arrays.asList(p1.getX(), p2.getX(), p3.getX(), p4.getX());
@@ -750,7 +747,6 @@ private Rectangle getAsRectangle(Point p1, Point p2, Point p3, Point p4) {
750747
*
751748
* @param rect1 first Rectangle
752749
* @param rect2 second Rectangle
753-
* @return
754750
*/
755751
private Rectangle getRectanglesIntersection(Rectangle rect1, Rectangle rect2) {
756752
float x1 = Math.max(rect1.getLeft(), rect2.getLeft());

0 commit comments

Comments
 (0)
Please sign in to comment.