Skip to content

Commit eb25fb0

Browse files
committed
[RELEASE] iText pdfOCR 4.0.0
2 parents 78c4b90 + 55a45d5 commit eb25fb0

File tree

70 files changed

+1008
-1013
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

70 files changed

+1008
-1013
lines changed

SECURITY.md

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
# iText Security Policy
2+
3+
## Reporting a Vulnerability
4+
5+
We are committed to maintaining the security of our software. If you discover a security vulnerability, we encourage you to report it to us as soon as possible.
6+
7+
To report a vulnerability, please visit our [Vulnerability Reporting Page](https://itextpdf.com/report-vulnerability), or email [[email protected]]([email protected]). If you do not receive a response in 2 business days, please follow up as we may not have received your message.
8+
9+
We follow the procedure of Coordinated Vulnerability Disclosure (CVD) and, to protect the ecosystem, we request that those reporting do the same. Please visit the above page for more information, and follow the steps below to ensure that your report is handled promptly and appropriately:
10+
11+
1. **Do not disclose the vulnerability publicly** until we have had a chance to address it.
12+
2. **Provide a detailed description** of the vulnerability, including steps to reproduce it, if possible.
13+
3. **Include any relevant information** such as the version of pdfOCR you are using, your operating system, and any other pertinent details.
14+
15+
## Security Updates and Patches
16+
17+
When a vulnerability is reported, we will:
18+
19+
1. **Investigate and verify** the vulnerability.
20+
2. **Develop and test** a fix for the vulnerability.
21+
3. **Release a patch** as soon as possible.
22+
23+
24+
## Known Vulnerabilities
25+
26+
The iText Knowledge Base has a page for known [Common Vulnerabilities and Exposures](https://kb.itextpdf.com/itext/cves) (CVEs), please check it to ensure your vulnerability has not already been disclosed or addressed.
27+
28+
## Supported product lines
29+
30+
See [Compatibility Matrix](https://kb.itextpdf.com/itext/compatibility-matrix)
31+
32+
## Security Best Practices
33+
34+
To help ensure the security of your applications using pdfOCR, we recommend the following best practices:
35+
36+
1. **Keep pdfOCR up to date** by regularly checking for and applying updates.
37+
2. **Review and follow** our security guidelines for secure usage.
38+
3. **Monitor your applications** for any unusual activity and investigate any anomalies promptly.
39+
40+
Thank you for helping us keep iText secure!

pdfocr-api/pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
<parent>
66
<groupId>com.itextpdf</groupId>
77
<artifactId>pdfocr-root</artifactId>
8-
<version>3.0.2</version>
8+
<version>4.0.0</version>
99
</parent>
1010

1111
<artifactId>pdfocr-api</artifactId>

pdfocr-api/src/main/java/com/itextpdf/pdfocr/IOcrEngine.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,4 +85,11 @@ public interface IOcrEngine {
8585
* @param ocrProcessContext ocr processing context
8686
*/
8787
void createTxtFile(List<File> inputImages, File txtFile, OcrProcessContext ocrProcessContext);
88+
89+
/**
90+
* Checks whether tagging is supported by the OCR engine.
91+
*
92+
* @return {@code true} if tagging is supported by the engine, {@code false} otherwise
93+
*/
94+
boolean isTaggingSupported();
8895
}

pdfocr-api/src/main/java/com/itextpdf/pdfocr/OcrPdfCreator.java

Lines changed: 17 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ This file is part of the iText (R) project.
3939
import com.itextpdf.kernel.geom.Point;
4040
import com.itextpdf.kernel.geom.Rectangle;
4141
import com.itextpdf.kernel.pdf.DocumentProperties;
42-
import com.itextpdf.kernel.pdf.PdfAConformanceLevel;
42+
import com.itextpdf.kernel.pdf.PdfAConformance;
4343
import com.itextpdf.kernel.pdf.PdfDocument;
4444
import com.itextpdf.kernel.pdf.PdfDocumentInfo;
4545
import com.itextpdf.kernel.pdf.PdfName;
@@ -67,8 +67,8 @@ This file is part of the iText (R) project.
6767
import com.itextpdf.pdfocr.logs.PdfOcrLogMessageConstant;
6868
import com.itextpdf.pdfocr.statistics.PdfOcrOutputType;
6969
import com.itextpdf.pdfocr.statistics.PdfOcrOutputTypeStatisticsEvent;
70-
import com.itextpdf.pdfocr.structuretree.LogicalStructureTreeItem;
7170
import com.itextpdf.pdfocr.structuretree.ArtifactItem;
71+
import com.itextpdf.pdfocr.structuretree.LogicalStructureTreeItem;
7272

7373
import java.io.File;
7474
import java.io.IOException;
@@ -129,6 +129,9 @@ public OcrPdfCreator(final IOcrEngine ocrEngine) {
129129
*/
130130
public OcrPdfCreator(final IOcrEngine ocrEngine,
131131
final OcrPdfCreatorProperties ocrPdfCreatorProperties) {
132+
if (ocrPdfCreatorProperties.isTagged() && !ocrEngine.isTaggingSupported()) {
133+
throw new PdfOcrException(PdfOcrExceptionMessageConstant.TAGGING_IS_NOT_SUPPORTED);
134+
}
132135
setOcrEngine(ocrEngine);
133136
setOcrPdfCreatorProperties(ocrPdfCreatorProperties);
134137
}
@@ -471,11 +474,7 @@ private void addToCanvas(final PdfDocument pdfDocument,
471474
// Logical tree, a list of top items, children can be retrieved out of them
472475
List<LogicalStructureTreeItem> logicalTree = new ArrayList<>();
473476
// A map of leaf LogicalStructureTreeItem's to TextInfo's attached to these leaves
474-
Map<LogicalStructureTreeItem, List<TextInfo>> leavesTextInfos = new HashMap<>();
475-
final boolean taggedSupported = getLogicalTree(pageText, logicalTree, leavesTextInfos);
476-
if (!taggedSupported) {
477-
throw new PdfOcrException(PdfOcrExceptionMessageConstant.TAGGING_IS_NOT_SUPPORTED);
478-
}
477+
Map<LogicalStructureTreeItem, List<TextInfo>> leavesTextInfos = getLogicalTree(pageText, logicalTree);
479478
pdfDocument.setTagged();
480479

481480
// Create a map of TextInfo to tag pointers meanwhile creating the required tags.
@@ -504,7 +503,7 @@ private PdfDocument createPdfDocument(final PdfWriter pdfWriter,
504503
boolean createPdfA3u = pdfOutputIntent != null;
505504
if (createPdfA3u) {
506505
pdfDocument = new PdfADocument(pdfWriter,
507-
PdfAConformanceLevel.PDF_A_3U, pdfOutputIntent,
506+
PdfAConformance.PDF_A_3U, pdfOutputIntent,
508507
documentProperties);
509508
} else {
510509
pdfDocument = new PdfDocument(pdfWriter,
@@ -623,7 +622,7 @@ private void addImageToCanvas(final ImageData imageData,
623622
ocrPdfCreatorProperties.getPageSize(), imageSize);
624623
final Rectangle rect =
625624
new Rectangle(
626-
(float)coordinates.x, (float)coordinates.y,
625+
(float)coordinates.getX(), (float)coordinates.getY(),
627626
imageSize.getWidth(), imageSize.getHeight());
628627
pdfCanvas.addImageFittedIntoRectangle(imageData, rect, false);
629628
}
@@ -634,19 +633,12 @@ private void addImageToCanvas(final ImageData imageData,
634633
}
635634
}
636635

637-
/**
638-
* @return {@code true} if tagging supported by the engine.
639-
* @deprecated In next major version we need to add boolean taggingSupported() method into IOcrEngine
640-
* and throw exception in OcrPdfCreator constructor if taggingSupported() returns false but
641-
* OcrPdfCreatorProperties.getTagged returns true.
642-
*/
643-
@Deprecated
644-
private static boolean getLogicalTree(List<TextInfo> textInfos,
645-
List<LogicalStructureTreeItem> logicalStructureTreeItems,
646-
Map<LogicalStructureTreeItem, List<TextInfo>> leavesTextInfos) {
647-
boolean taggedSupported = false;
636+
private static Map<LogicalStructureTreeItem, List<TextInfo>> getLogicalTree(
637+
List<TextInfo> textInfos, List<LogicalStructureTreeItem> logicalStructureTreeItems) {
638+
639+
Map<LogicalStructureTreeItem, List<TextInfo>> leavesTextInfos = new HashMap<>();
648640
if (textInfos == null) {
649-
return taggedSupported;
641+
return leavesTextInfos;
650642
}
651643

652644
for (TextInfo textInfo : textInfos) {
@@ -656,7 +648,6 @@ private static boolean getLogicalTree(List<TextInfo> textInfos,
656648
continue;
657649
} else if (structTreeItem != null) {
658650
topParent = getTopParent(structTreeItem);
659-
taggedSupported = true;
660651
} else {
661652
structTreeItem = new LogicalStructureTreeItem();
662653
textInfo.setLogicalStructureTreeItem(structTreeItem);
@@ -675,7 +666,7 @@ private static boolean getLogicalTree(List<TextInfo> textInfos,
675666
}
676667
}
677668

678-
return taggedSupported;
669+
return leavesTextInfos;
679670
}
680671

681672
private static LogicalStructureTreeItem getTopParent(LogicalStructureTreeItem structInfo) {
@@ -792,8 +783,8 @@ private void addTextToCanvas(
792783
}
793784

794785
canvas.showTextAligned(paragraph,
795-
xOffset + (float) imageCoordinates.x,
796-
yOffset + (float) imageCoordinates.y,
786+
xOffset + (float) imageCoordinates.getX(),
787+
yOffset + (float) imageCoordinates.getY(),
797788
TextAlignment.LEFT);
798789

799790
if (ocrPdfCreatorProperties.isTagged()) {
@@ -933,7 +924,7 @@ public PdfCanvas showText(GlyphLine text) {
933924
// unicode of the not found glyph
934925
String message = PdfOcrLogMessageConstant
935926
.COULD_NOT_FIND_CORRESPONDING_GLYPH_TO_UNICODE_CHARACTER;
936-
for (int i = glyphLine.start; i < glyphLine.end; i++) {
927+
for (int i = glyphLine.getStart(); i < glyphLine.getEnd(); i++) {
937928
if (isNotDefGlyph(currentFont, glyphLine.get(i))) {
938929
notDefGlyphsExists = true;
939930
message = MessageFormatUtil.format(PdfOcrLogMessageConstant

0 commit comments

Comments
 (0)