Skip to content

Commit 524c8ed

Browse files
committed
[RELEASE] iText pdfSweep 4.0.3
2 parents 97729a1 + 0e953a5 commit 524c8ed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

42 files changed

+777
-348
lines changed

pom.xml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,12 @@
55
<parent>
66
<groupId>com.itextpdf</groupId>
77
<artifactId>root</artifactId>
8-
<version>8.0.3</version>
8+
<version>8.0.5</version>
99
<relativePath />
1010
</parent>
1111

1212
<artifactId>cleanup</artifactId>
13-
<version>4.0.2</version>
13+
<version>4.0.3</version>
1414

1515
<name>pdfSweep</name>
1616
<description>Redact PDF documents. If you have to share PDFs with different departments or send them out of house, but they

sharpenConfiguration.xml

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,10 @@
77
<file path="com/itextpdf/pdfcleanup/util/CleanUpCsCompareUtil.java"/>
88
<file path="com/itextpdf/pdfcleanup/CleanUpCsCompareUtilTest.java"/>
99
</fileset>
10+
<fileset reason="This class contains a test which checks results of a bug regarding image type.
11+
This has been fixed for specific JDKs but not all. Needs different implementation for .NET.">
12+
<file path="com/itextpdf/pdfcleanup/UnsupportedImageTypeTest.java"/>
13+
</fileset>
1014
</java>
1115
<resource>
1216
<file path="com/itextpdf/pdfcleanup/CleanUpTaggedPdfTest/cmp_cleanImage_partial.pdf" />
@@ -37,7 +41,7 @@
3741
<file path="com/itextpdf/pdfcleanup/FilteredImagesCacheTest/cmp_filteredImagesCacheTest04.pdf" />
3842
<file path="com/itextpdf/pdfcleanup/images/CleanupImageWithColorSpaceTest/cmp_imgSeparationCs.pdf" />
3943
<file path="com/itextpdf/pdfcleanup/images/CleanupImageWithColorSpaceTest/cmp_imgSeparationCsJpegBaselineEncoded.pdf" />
40-
<file path="com/itextpdf/pdfcleanup/images/CleanupImageWithColorSpaceTest/cmp_imgSeparationCsJpegBaselineEncodedWithApp14Segment.pdf" />
44+
<file path="com/itextpdf/pdfcleanup/images/CleanupImageWithColorSpaceTest/cmp_imgSeparationCsJpegBaselineEncodedWithApp14Segment.pdf" />
4145
</resource>
4246
</ignored>
4347
<overwritten>

src/main/java/com/itextpdf/pdfcleanup/CleanUpProperties.java

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ This file is part of the iText (R) project.
2323
package com.itextpdf.pdfcleanup;
2424

2525
import com.itextpdf.commons.actions.contexts.IMetaInfo;
26+
import com.itextpdf.pdfcleanup.exceptions.CleanupExceptionMessageConstant;
2627

2728
/**
2829
* Contains properties for {@link PdfCleanUpTool} operations.
@@ -31,6 +32,7 @@ public class CleanUpProperties {
3132

3233
private IMetaInfo metaInfo;
3334
private boolean processAnnotations;
35+
private Double overlapRatio;
3436

3537
/**
3638
* Creates default CleanUpProperties instance.
@@ -76,4 +78,36 @@ public boolean isProcessAnnotations() {
7678
public void setProcessAnnotations(boolean processAnnotations) {
7779
this.processAnnotations = processAnnotations;
7880
}
81+
82+
/**
83+
* Gets the overlap ratio.
84+
* This is a value between 0 and 1 that indicates how much the content region should overlap with the redaction
85+
* area to be removed.
86+
*
87+
* @return the overlap ratio or {@code null} if it has not been set.
88+
*/
89+
public Double getOverlapRatio() {
90+
return overlapRatio;
91+
}
92+
93+
/**
94+
* Sets the overlap ratio.
95+
* This is a value between 0 and 1 that indicates how much the content region should overlap with the
96+
* redaction area to be removed.
97+
* <p>
98+
* Example: if the overlap ratio is set to 0.3, the content region will be removed if it overlaps with
99+
* the redaction area by at least 30%.
100+
*
101+
* @param overlapRatio The overlap ratio to set.
102+
*/
103+
public void setOverlapRatio(Double overlapRatio) {
104+
if (overlapRatio == null) {
105+
this.overlapRatio = null;
106+
return;
107+
}
108+
if (overlapRatio <= 0 || overlapRatio > 1) {
109+
throw new IllegalArgumentException(CleanupExceptionMessageConstant.OVERLAP_RATIO_SHOULD_BE_IN_RANGE);
110+
}
111+
this.overlapRatio = overlapRatio;
112+
}
79113
}

src/main/java/com/itextpdf/pdfcleanup/PdfCleanUpFilter.java

Lines changed: 53 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -95,10 +95,13 @@ class PdfCleanUpFilter {
9595
private static final Set<PdfName> NOT_SUPPORTED_FILTERS_FOR_DIRECT_CLEANUP = Collections.unmodifiableSet(
9696
new LinkedHashSet<>(Arrays.asList(PdfName.JBIG2Decode, PdfName.DCTDecode, PdfName.JPXDecode)));
9797

98-
private List<Rectangle> regions;
98+
private final List<Rectangle> regions;
9999

100-
public PdfCleanUpFilter(List<Rectangle> regions) {
100+
private final CleanUpProperties properties;
101+
102+
public PdfCleanUpFilter(List<Rectangle> regions, CleanUpProperties properties) {
101103
this.regions = regions;
104+
this.properties = properties;
102105
}
103106

104107
static boolean imageSupportsDirectCleanup(PdfImageXObject image) {
@@ -118,7 +121,7 @@ static boolean imageSupportsDirectCleanup(PdfImageXObject image) {
118121
* are never considered as intersecting.
119122
* @return true if the rectangles intersect, false otherwise
120123
*/
121-
static boolean checkIfRectanglesIntersect(Point[] rect1, Point[] rect2) {
124+
boolean checkIfRectanglesIntersect(Point[] rect1, Point[] rect2) {
122125
IClipper clipper = new DefaultClipper();
123126
// If the redaction area is degenerate, the result will be false
124127
if (!ClipperBridge.addPolygonToClipper(clipper, rect2, PolyType.CLIP)) {
@@ -170,29 +173,44 @@ static boolean checkIfRectanglesIntersect(Point[] rect1, Point[] rect2) {
170173
// working with paths is considered to be a bit faster in terms of performance.
171174
Paths paths = new Paths();
172175
clipper.execute(ClipType.INTERSECTION, paths, PolyFillType.NON_ZERO, PolyFillType.NON_ZERO);
173-
return !checkIfIntersectionRectangleDegenerate(paths.getBounds(), false)
174-
&& !paths.isEmpty();
175-
} else {
176-
int rect1Size = rect1.length;
176+
return checkIfIntersectionOccurs(paths, rect1, false);
177+
}
178+
intersectionSubjectAdded = ClipperBridge.addPolylineSubjectToClipper(clipper, rect1);
179+
if (!intersectionSubjectAdded) {
180+
// According to the comment above,
181+
// this could have happened only if all four passed points are actually the same point.
182+
// Adding here a point really close to the original point, to make sure it's not covered by the
183+
// intersecting rectangle.
184+
final double SMALL_DIFF = 0.01;
185+
final Point[] expandedRect1 = new Point[rect1.length + 1];
186+
System.arraycopy(rect1, 0, expandedRect1, 0, rect1.length);
187+
expandedRect1[rect1.length] = new Point(rect1[0].getX() + SMALL_DIFF, rect1[0].getY());
188+
rect1 = expandedRect1;
189+
177190
intersectionSubjectAdded = ClipperBridge.addPolylineSubjectToClipper(clipper, rect1);
178-
if (!intersectionSubjectAdded) {
179-
// According to the comment above,
180-
// this could have happened only if all four passed points are actually the same point.
181-
// Adding here a point really close to the original point, to make sure it's not covered by the
182-
// intersecting rectangle.
183-
double smallDiff = 0.01;
184-
List<Point> rect1List = new ArrayList<Point>(Arrays.asList(rect1));
185-
rect1List.add(new Point(rect1[0].getX() + smallDiff, rect1[0].getY()));
186-
rect1 = rect1List.toArray(new Point[rect1Size]);
187-
intersectionSubjectAdded = ClipperBridge.addPolylineSubjectToClipper(clipper, rect1);
188-
assert intersectionSubjectAdded;
189-
}
190-
PolyTree polyTree = new PolyTree();
191-
clipper.execute(ClipType.INTERSECTION, polyTree, PolyFillType.NON_ZERO, PolyFillType.NON_ZERO);
192-
Paths paths = Paths.makePolyTreeToPaths(polyTree);
193-
return !checkIfIntersectionRectangleDegenerate(paths.getBounds(), true)
194-
&& !paths.isEmpty();
191+
assert intersectionSubjectAdded;
192+
}
193+
PolyTree polyTree = new PolyTree();
194+
clipper.execute(ClipType.INTERSECTION, polyTree, PolyFillType.NON_ZERO, PolyFillType.NON_ZERO);
195+
return checkIfIntersectionOccurs(Paths.makePolyTreeToPaths(polyTree), rect1, true);
196+
}
197+
198+
private boolean checkIfIntersectionOccurs(Paths paths, Point[] rect1, boolean isDegenerate) {
199+
if (paths.isEmpty()) {
200+
return false;
195201
}
202+
final LongRect intersectionRectangle = paths.getBounds();
203+
// If the user defines a overlappingRatio we use this to calculate whether it intersects enough
204+
// To pass as an intersection
205+
if (properties.getOverlapRatio() == null) {
206+
return !checkIfIntersectionRectangleDegenerate(intersectionRectangle, isDegenerate);
207+
}
208+
final double overlappedArea = CleanUpHelperUtil.calculatePolygonArea(rect1);
209+
final double intersectionArea = ClipperBridge.longRectCalculateHeight(intersectionRectangle) *
210+
ClipperBridge.longRectCalculateWidth(intersectionRectangle);
211+
final double percentageOfOverlapping = intersectionArea / overlappedArea;
212+
final float SMALL_VALUE_FOR_ROUNDING_ERRORS = 1e-5f;
213+
return percentageOfOverlapping + SMALL_VALUE_FOR_ROUNDING_ERRORS > properties.getOverlapRatio();
196214
}
197215

198216
/**
@@ -274,7 +292,7 @@ FilteredImagesCache.FilteredImageKey createFilteredImageKey(PdfImageXObject imag
274292
* @return a filtered {@link com.itextpdf.kernel.geom.Path} object.
275293
*/
276294
private com.itextpdf.kernel.geom.Path filterFillPath(com.itextpdf.kernel.geom.Path path,
277-
Matrix ctm, int fillingRule) {
295+
Matrix ctm, int fillingRule) {
278296
path.closeAllSubpaths();
279297

280298
IClipper clipper = new DefaultClipper();
@@ -336,8 +354,8 @@ private List<Rectangle> getImageAreasToBeCleaned(Matrix imageCtm) {
336354
}
337355

338356
private com.itextpdf.kernel.geom.Path filterStrokePath(com.itextpdf.kernel.geom.Path sourcePath, Matrix ctm,
339-
float lineWidth, int lineCapStyle, int lineJoinStyle,
340-
float miterLimit, LineDashPattern lineDashPattern) {
357+
float lineWidth, int lineCapStyle, int lineJoinStyle,
358+
float miterLimit, LineDashPattern lineDashPattern) {
341359
com.itextpdf.kernel.geom.Path path = sourcePath;
342360
JoinType joinType = ClipperBridge.getJoinType(lineJoinStyle);
343361
EndType endType = ClipperBridge.getEndType(lineCapStyle);
@@ -420,15 +438,14 @@ private static FilterResult<ImageData> filterImage(PdfImageXObject image, List<R
420438
* is true) and it is included into intersecting rectangle, this method returns false,
421439
* despite of the intersection rectangle is degenerate.
422440
*
423-
* @param rect intersection rectangle
441+
* @param rect intersection rectangle
424442
* @param isIntersectSubjectDegenerate value, specifying if the intersection subject
425443
* is degenerate.
426444
* @return true - if the intersection rectangle is degenerate.
427445
*/
428-
private static boolean checkIfIntersectionRectangleDegenerate(LongRect rect,
429-
boolean isIntersectSubjectDegenerate) {
430-
float width = (float)(Math.abs(rect.left - rect.right) / ClipperBridge.floatMultiplier);
431-
float height = (float)(Math.abs(rect.top - rect.bottom) / ClipperBridge.floatMultiplier);
446+
private static boolean checkIfIntersectionRectangleDegenerate(LongRect rect, boolean isIntersectSubjectDegenerate) {
447+
final float width = ClipperBridge.longRectCalculateWidth(rect);
448+
final float height = ClipperBridge.longRectCalculateHeight(rect);
432449
return isIntersectSubjectDegenerate ? (width < EPS && height < EPS) : (width < EPS || height < EPS);
433450
}
434451

@@ -466,7 +483,7 @@ private static boolean isSupportedFilterForDirectImageCleanup(PdfObject filter)
466483
return true;
467484
}
468485
if (filter.isName()) {
469-
return !NOT_SUPPORTED_FILTERS_FOR_DIRECT_CLEANUP.contains((PdfName)filter);
486+
return !NOT_SUPPORTED_FILTERS_FOR_DIRECT_CLEANUP.contains((PdfName) filter);
470487
} else if (filter.isArray()) {
471488
PdfArray filterArray = (PdfArray) filter;
472489
for (int i = 0; i < filterArray.size(); ++i) {
@@ -508,7 +525,7 @@ private static Rectangle transformRectIntoImageCoordinates(Rectangle rect, Matri
508525
* Filters image content using direct manipulation over PDF image samples stream. Implemented according to ISO 32000-2,
509526
* "8.9.3 Sample representation".
510527
*
511-
* @param image image XObject which will be filtered
528+
* @param image image XObject which will be filtered
512529
* @param imageAreasToBeCleaned list of rectangle areas for clean up with coordinates in (0,1)x(0,1) space
513530
* @return raw bytes of the PDF image samples stream which is already cleaned.
514531
*/
@@ -529,7 +546,7 @@ private static byte[] processImageDirectly(PdfImageXObject image, List<Rectangle
529546
throw new IllegalArgumentException("/BitsPerComponent only allowed values are: 1, 2, 4, 8 and 16.");
530547
}
531548

532-
double bytesInComponent = (double)bpc / 8;
549+
double bytesInComponent = (double) bpc / 8;
533550
int firstComponentInByte = 0;
534551
if (bpc < 16) {
535552
for (int i = 0; i < bpc; ++i) {
@@ -544,7 +561,7 @@ private static byte[] processImageDirectly(PdfImageXObject image, List<Rectangle
544561
rowPadding = (int) (8 - (width * bpc) % 8);
545562
}
546563
for (Rectangle rect : imageAreasToBeCleaned) {
547-
int[] cleanImgRect = CleanUpHelperUtil.getImageRectToClean(rect, (int)width, (int)height);
564+
int[] cleanImgRect = CleanUpHelperUtil.getImageRectToClean(rect, (int) width, (int) height);
548565
for (int j = cleanImgRect[Y]; j < cleanImgRect[Y] + cleanImgRect[H]; ++j) {
549566
for (int i = cleanImgRect[X]; i < cleanImgRect[X] + cleanImgRect[W]; ++i) {
550567
// based on assumption that numOfComponents always equals 1, because this method is only for monochrome and grayscale images
@@ -751,7 +768,6 @@ private static Point[] transformPoints(Matrix transformationMatrix, boolean inve
751768
private static Point[] getTextRectangle(TextRenderInfo renderInfo) {
752769
LineSegment ascent = renderInfo.getAscentLine();
753770
LineSegment descent = renderInfo.getDescentLine();
754-
755771
return new Point[]{
756772
new Point(ascent.getStartPoint().get(0), ascent.getStartPoint().get(1)),
757773
new Point(ascent.getEndPoint().get(0), ascent.getEndPoint().get(1)),

src/main/java/com/itextpdf/pdfcleanup/PdfCleanUpProcessor.java

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -169,10 +169,15 @@ public class PdfCleanUpProcessor extends PdfCanvasProcessor {
169169
private TextPositioning textPositioning;
170170
private FilteredImagesCache filteredImagesCache;
171171

172+
172173
PdfCleanUpProcessor(List<Rectangle> cleanUpRegions, PdfDocument document) {
174+
this(cleanUpRegions, document, new CleanUpProperties());
175+
}
176+
177+
PdfCleanUpProcessor(List<Rectangle> cleanUpRegions, PdfDocument document, CleanUpProperties properties) {
173178
super(new PdfCleanUpEventListener());
174179
this.document = document;
175-
this.filter = new PdfCleanUpFilter(cleanUpRegions);
180+
this.filter = new PdfCleanUpFilter(cleanUpRegions, properties);
176181
this.canvasStack = new Stack<>();
177182
this.notAppliedGsParams = new ArrayDeque<>();
178183
this.notAppliedGsParams.push(new NotAppliedGsParams());

src/main/java/com/itextpdf/pdfcleanup/PdfCleanUpTool.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ public class PdfCleanUpTool {
9292

9393
private PdfDocument pdfDocument;
9494

95-
private boolean processAnnotations;
95+
private CleanUpProperties properties;
9696

9797
/**
9898
* Key - page number, value - list of locations related to the page.
@@ -137,14 +137,14 @@ public PdfCleanUpTool(PdfDocument pdfDocument, boolean cleanRedactAnnotations, C
137137
if (pdfDocument.getReader() == null || pdfDocument.getWriter() == null) {
138138
throw new PdfException(CleanupExceptionMessageConstant.PDF_DOCUMENT_MUST_BE_OPENED_IN_STAMPING_MODE);
139139
}
140+
this.properties = properties;
140141
this.pdfDocument = pdfDocument;
141142
this.pdfCleanUpLocations = new HashMap<>();
142143
this.filteredImagesCache = new FilteredImagesCache();
143144

144145
if (cleanRedactAnnotations) {
145146
addCleanUpLocationsBasedOnRedactAnnotations();
146147
}
147-
processAnnotations = properties.isProcessAnnotations();
148148
}
149149

150150
/**
@@ -215,10 +215,10 @@ private void cleanUpPage(int pageNumber, List<PdfCleanUpLocation> cleanUpLocatio
215215
}
216216

217217
PdfPage page = pdfDocument.getPage(pageNumber);
218-
PdfCleanUpProcessor cleanUpProcessor = new PdfCleanUpProcessor(regions, pdfDocument);
218+
PdfCleanUpProcessor cleanUpProcessor = new PdfCleanUpProcessor(regions, pdfDocument, this.properties);
219219
cleanUpProcessor.setFilteredImagesCache(filteredImagesCache);
220220
cleanUpProcessor.processPageContent(page);
221-
if (processAnnotations) {
221+
if (properties.isProcessAnnotations()) {
222222
cleanUpProcessor.processPageAnnotations(page, regions, redactAnnotations != null);
223223
}
224224

src/main/java/com/itextpdf/pdfcleanup/actions/data/PdfSweepProductData.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ public class PdfSweepProductData {
3131
public static final String PDF_SWEEP_PRODUCT_NAME = "pdfSweep";
3232
public static final String PDF_SWEEP_PUBLIC_PRODUCT_NAME = PDF_SWEEP_PRODUCT_NAME;
3333

34-
private static final String PDF_SWEEP_VERSION = "4.0.2";
34+
private static final String PDF_SWEEP_VERSION = "4.0.3";
3535
private static final int PDF_SWEEP_COPYRIGHT_SINCE = 2000;
3636
private static final int PDF_SWEEP_COPYRIGHT_TO = 2024;
3737

src/main/java/com/itextpdf/pdfcleanup/exceptions/CleanupExceptionMessageConstant.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,9 @@ public final class CleanupExceptionMessageConstant {
3434
// Do not remove, it's used in .NET
3535
// This same exception message is thrown in CleanUpImageUtil#cleanImage when the image format is unsupported
3636
public static final String UNSUPPORTED_IMAGE_TYPE = "Unsupported image type";
37+
public static final String OVERLAP_RATIO_SHOULD_BE_IN_RANGE = "Overlap ratio should be in range (0, 1]";
3738

38-
private CleanupExceptionMessageConstant(){}
39+
private CleanupExceptionMessageConstant(){
40+
//empty constructor
41+
}
3942
}

src/main/java/com/itextpdf/pdfcleanup/util/CleanUpHelperUtil.java

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ This file is part of the iText (R) project.
2222
*/
2323
package com.itextpdf.pdfcleanup.util;
2424

25+
import com.itextpdf.kernel.geom.Point;
2526
import com.itextpdf.kernel.geom.Rectangle;
2627

2728
/**
@@ -56,4 +57,19 @@ public static int[] getImageRectToClean(Rectangle rect, int imgWidth, int imgHei
5657
int h = scaledTopY - scaledBottomY;
5758
return new int[]{x, y, w, h};
5859
}
60+
61+
62+
public static double calculatePolygonArea(Point[] vertices) {
63+
double sum = 0;
64+
for (int i = 0; i < vertices.length; i++) {
65+
if (i == 0) {
66+
sum += vertices[i].x * (vertices[i + 1].y - vertices[vertices.length - 1].y);
67+
} else if (i == vertices.length - 1) {
68+
sum += vertices[i].x * (vertices[0].y - vertices[i - 1].y);
69+
} else {
70+
sum += vertices[i].x * (vertices[i + 1].y - vertices[i - 1].y);
71+
}
72+
}
73+
return 0.5 * Math.abs(sum);
74+
}
5975
}

0 commit comments

Comments
 (0)