Skip to content

Commit 78c4b90

Browse files
committed
[RELEASE] iText pdfOCR 3.0.2
2 parents da196c6 + 151cd11 commit 78c4b90

File tree

136 files changed

+1204
-183
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

136 files changed

+1204
-183
lines changed

pdfocr-api/pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
<parent>
66
<groupId>com.itextpdf</groupId>
77
<artifactId>pdfocr-root</artifactId>
8-
<version>3.0.1</version>
8+
<version>3.0.2</version>
99
</parent>
1010

1111
<artifactId>pdfocr-api</artifactId>

pdfocr-api/src/main/java/com/itextpdf/pdfocr/AbstractPdfOcrEventHelper.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*
22
This file is part of the iText (R) project.
3-
Copyright (c) 1998-2023 Apryse Group NV
3+
Copyright (c) 1998-2024 Apryse Group NV
44
Authors: Apryse Software.
55
66
This program is offered under a commercial and under the AGPL license.

pdfocr-api/src/main/java/com/itextpdf/pdfocr/IImageRotationHandler.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*
22
This file is part of the iText (R) project.
3-
Copyright (c) 1998-2023 Apryse Group NV
3+
Copyright (c) 1998-2024 Apryse Group NV
44
Authors: Apryse Software.
55
66
This program is offered under a commercial and under the AGPL license.

pdfocr-api/src/main/java/com/itextpdf/pdfocr/IOcrEngine.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*
22
This file is part of the iText (R) project.
3-
Copyright (c) 1998-2023 Apryse Group NV
3+
Copyright (c) 1998-2024 Apryse Group NV
44
Authors: Apryse Software.
55
66
This program is offered under a commercial and under the AGPL license.
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
/*
2+
This file is part of the iText (R) project.
3+
Copyright (c) 1998-2024 Apryse Group NV
4+
Authors: Apryse Software.
5+
6+
This program is offered under a commercial and under the AGPL license.
7+
For commercial licensing, contact us at https://itextpdf.com/sales. For AGPL licensing, see below.
8+
9+
AGPL licensing:
10+
This program is free software: you can redistribute it and/or modify
11+
it under the terms of the GNU Affero General Public License as published by
12+
the Free Software Foundation, either version 3 of the License, or
13+
(at your option) any later version.
14+
15+
This program is distributed in the hope that it will be useful,
16+
but WITHOUT ANY WARRANTY; without even the implied warranty of
17+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18+
GNU Affero General Public License for more details.
19+
20+
You should have received a copy of the GNU Affero General Public License
21+
along with this program. If not, see <https://www.gnu.org/licenses/>.
22+
*/
23+
package com.itextpdf.pdfocr;
24+
25+
/**
26+
* OCR properties passed to the OCR engine as part of {@link OcrProcessContext}.
27+
*/
28+
public interface IOcrProcessProperties {
29+
}

pdfocr-api/src/main/java/com/itextpdf/pdfocr/IProductAware.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*
22
This file is part of the iText (R) project.
3-
Copyright (c) 1998-2023 Apryse Group NV
3+
Copyright (c) 1998-2024 Apryse Group NV
44
Authors: Apryse Software.
55
66
This program is offered under a commercial and under the AGPL license.

pdfocr-api/src/main/java/com/itextpdf/pdfocr/OcrEngineProperties.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*
22
This file is part of the iText (R) project.
3-
Copyright (c) 1998-2023 Apryse Group NV
3+
Copyright (c) 1998-2024 Apryse Group NV
44
Authors: Apryse Software.
55
66
This program is offered under a commercial and under the AGPL license.

pdfocr-api/src/main/java/com/itextpdf/pdfocr/OcrPdfCreator.java

Lines changed: 251 additions & 60 deletions
Large diffs are not rendered by default.

pdfocr-api/src/main/java/com/itextpdf/pdfocr/OcrPdfCreatorEventHelper.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*
22
This file is part of the iText (R) project.
3-
Copyright (c) 1998-2023 Apryse Group NV
3+
Copyright (c) 1998-2024 Apryse Group NV
44
Authors: Apryse Software.
55
66
This program is offered under a commercial and under the AGPL license.

pdfocr-api/src/main/java/com/itextpdf/pdfocr/OcrPdfCreatorProperties.java

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*
22
This file is part of the iText (R) project.
3-
Copyright (c) 1998-2023 Apryse Group NV
3+
Copyright (c) 1998-2024 Apryse Group NV
44
Authors: Apryse Software.
55
66
This program is offered under a commercial and under the AGPL license.
@@ -99,6 +99,11 @@ public class OcrPdfCreatorProperties {
9999

100100
private IMetaInfo metaInfo;
101101

102+
/**
103+
* Indicates whether the created pdf is tagged or not.
104+
*/
105+
private boolean tagged = false;
106+
102107
/**
103108
* Creates a new {@link OcrPdfCreatorProperties} instance.
104109
*/
@@ -369,6 +374,26 @@ public OcrPdfCreatorProperties setImageRotationHandler(
369374
return this;
370375
}
371376

377+
/**
378+
* Defines whether pdf document should be tagged or not.
379+
*
380+
* @param tagged {@code true} if the result pdf is expected to be tagged, {@code false} otherwise.
381+
* @return this {@link OcrPdfCreatorProperties} instance.
382+
*/
383+
public OcrPdfCreatorProperties setTagged(boolean tagged) {
384+
this.tagged = tagged;
385+
return this;
386+
}
387+
388+
/**
389+
* Retrieve information on whether pdf document should be tagged or not.
390+
*
391+
* @return {@code true} if the result pdf is expected to be tagged, {@code false} otherwise.
392+
*/
393+
public boolean isTagged() {
394+
return tagged;
395+
}
396+
372397
/**
373398
* Set meta info for this {@link OcrPdfCreatorProperties}.
374399
*

pdfocr-api/src/main/java/com/itextpdf/pdfocr/OcrProcessContext.java

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*
22
This file is part of the iText (R) project.
3-
Copyright (c) 1998-2023 Apryse Group NV
3+
Copyright (c) 1998-2024 Apryse Group NV
44
Authors: Apryse Software.
55
66
This program is offered under a commercial and under the AGPL license.
@@ -28,6 +28,8 @@ This file is part of the iText (R) project.
2828
public class OcrProcessContext {
2929
private AbstractPdfOcrEventHelper ocrEventHelper;
3030

31+
private IOcrProcessProperties ocrProcessProperties;
32+
3133
/**
3234
* Creates an instance of ocr process context
3335
*
@@ -54,4 +56,22 @@ public AbstractPdfOcrEventHelper getOcrEventHelper() {
5456
public void setOcrEventHelper(AbstractPdfOcrEventHelper eventHelper) {
5557
this.ocrEventHelper = eventHelper;
5658
}
59+
60+
/**
61+
* Set extra OCR process properties.
62+
*
63+
* @param ocrProcessProperties extra OCR process properties.
64+
*/
65+
void setOcrProcessProperties(IOcrProcessProperties ocrProcessProperties) {
66+
this.ocrProcessProperties = ocrProcessProperties;
67+
}
68+
69+
/**
70+
* Get extra OCR process properties.
71+
*
72+
* @return extra OCR process properties.
73+
*/
74+
public IOcrProcessProperties getOcrProcessProperties() {
75+
return ocrProcessProperties;
76+
}
5777
}

pdfocr-api/src/main/java/com/itextpdf/pdfocr/PdfCreatorUtil.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*
22
This file is part of the iText (R) project.
3-
Copyright (c) 1998-2023 Apryse Group NV
3+
Copyright (c) 1998-2024 Apryse Group NV
44
Authors: Apryse Software.
55
66
This program is offered under a commercial and under the AGPL license.
@@ -96,6 +96,7 @@ static float calculateFontSize(final Document document, final String line,
9696

9797
try {
9898
Paragraph paragraph = new Paragraph(line);
99+
paragraph.setMargin(0);
99100
paragraph.setWidth(bbox.getWidth());
100101
paragraph.setFontFamily(fontFamily);
101102

pdfocr-api/src/main/java/com/itextpdf/pdfocr/PdfOcrFontProvider.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*
22
This file is part of the iText (R) project.
3-
Copyright (c) 1998-2023 Apryse Group NV
3+
Copyright (c) 1998-2024 Apryse Group NV
44
Authors: Apryse Software.
55
66
This program is offered under a commercial and under the AGPL license.

pdfocr-api/src/main/java/com/itextpdf/pdfocr/PdfOcrMetaInfoContainer.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*
22
This file is part of the iText (R) project.
3-
Copyright (c) 1998-2023 Apryse Group NV
3+
Copyright (c) 1998-2024 Apryse Group NV
44
Authors: Apryse Software.
55
66
This program is offered under a commercial and under the AGPL license.

pdfocr-api/src/main/java/com/itextpdf/pdfocr/ScaleMode.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*
22
This file is part of the iText (R) project.
3-
Copyright (c) 1998-2023 Apryse Group NV
3+
Copyright (c) 1998-2024 Apryse Group NV
44
Authors: Apryse Software.
55
66
This program is offered under a commercial and under the AGPL license.

pdfocr-api/src/main/java/com/itextpdf/pdfocr/TextInfo.java

Lines changed: 30 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*
22
This file is part of the iText (R) project.
3-
Copyright (c) 1998-2023 Apryse Group NV
3+
Copyright (c) 1998-2024 Apryse Group NV
44
Authors: Apryse Software.
55
66
This program is offered under a commercial and under the AGPL license.
@@ -23,11 +23,7 @@ This file is part of the iText (R) project.
2323
package com.itextpdf.pdfocr;
2424

2525
import com.itextpdf.kernel.geom.Rectangle;
26-
27-
import java.util.ArrayList;
28-
import java.util.Collections;
29-
import java.util.List;
30-
26+
import com.itextpdf.pdfocr.structuretree.LogicalStructureTreeItem;
3127

3228
/**
3329
* This class describes how recognized text is positioned on the image
@@ -45,6 +41,11 @@ public class TextInfo {
4541
*/
4642
private Rectangle bboxRect;
4743

44+
/**
45+
* If LogicalStructureTreeItem is set, then {@link TextInfo}s are expected to be in logical order.
46+
*/
47+
private LogicalStructureTreeItem logicalStructureTreeItem;
48+
4849
/**
4950
* Creates a new {@link TextInfo} instance.
5051
*/
@@ -107,4 +108,27 @@ public Rectangle getBboxRect() {
107108
public void setBboxRect(final Rectangle bbox) {
108109
this.bboxRect = new Rectangle(bbox);
109110
}
111+
112+
/**
113+
* Retrieves structure tree item for the text item.
114+
*
115+
* @return structure tree item.
116+
*/
117+
public LogicalStructureTreeItem getLogicalStructureTreeItem() {
118+
return logicalStructureTreeItem;
119+
}
120+
121+
/**
122+
* Sets logical structure tree parent item for the text info. It allows to organize text chunks
123+
* into logical hierarchy, e.g. specify document paragraphs, tables, etc.
124+
* <p>
125+
*
126+
* If LogicalStructureTreeItem is set, then the list of {@link TextInfo}s in {@link IOcrEngine#doImageOcr}
127+
* return value is expected to be in logical order.
128+
*
129+
* @param logicalStructureTreeItem structure tree item.
130+
*/
131+
public void setLogicalStructureTreeItem(LogicalStructureTreeItem logicalStructureTreeItem) {
132+
this.logicalStructureTreeItem = logicalStructureTreeItem;
133+
}
110134
}

pdfocr-api/src/main/java/com/itextpdf/pdfocr/exceptions/PdfOcrException.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*
22
This file is part of the iText (R) project.
3-
Copyright (c) 1998-2023 Apryse Group NV
3+
Copyright (c) 1998-2024 Apryse Group NV
44
Authors: Apryse Software.
55
66
This program is offered under a commercial and under the AGPL license.

pdfocr-api/src/main/java/com/itextpdf/pdfocr/exceptions/PdfOcrExceptionMessageConstant.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*
22
This file is part of the iText (R) project.
3-
Copyright (c) 1998-2023 Apryse Group NV
3+
Copyright (c) 1998-2024 Apryse Group NV
44
Authors: Apryse Software.
55
66
This program is offered under a commercial and under the AGPL license.
@@ -33,6 +33,7 @@ public class PdfOcrExceptionMessageConstant {
3333
public static final String CANNOT_CREATE_PDF_DOCUMENT = "Cannot create PDF document: {0}";
3434
public static final String STATISTICS_EVENT_TYPE_CANT_BE_NULL = "Statistics event type can't be null";
3535
public static final String STATISTICS_EVENT_TYPE_IS_NOT_DETECTED = "Statistics event type is not detected.";
36+
public static final String TAGGING_IS_NOT_SUPPORTED = "Tagging is not supported by the OCR engine.";
3637

3738
private PdfOcrExceptionMessageConstant() {
3839
//Private constructor will prevent the instantiation of this class directly

pdfocr-api/src/main/java/com/itextpdf/pdfocr/exceptions/PdfOcrInputException.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*
22
This file is part of the iText (R) project.
3-
Copyright (c) 1998-2023 Apryse Group NV
3+
Copyright (c) 1998-2024 Apryse Group NV
44
Authors: Apryse Software.
55
66
This program is offered under a commercial and under the AGPL license.

pdfocr-api/src/main/java/com/itextpdf/pdfocr/logs/PdfOcrLogMessageConstant.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*
22
This file is part of the iText (R) project.
3-
Copyright (c) 1998-2023 Apryse Group NV
3+
Copyright (c) 1998-2024 Apryse Group NV
44
Authors: Apryse Software.
55
66
This program is offered under a commercial and under the AGPL license.

pdfocr-api/src/main/java/com/itextpdf/pdfocr/statistics/PdfOcrOutputType.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*
22
This file is part of the iText (R) project.
3-
Copyright (c) 1998-2023 Apryse Group NV
3+
Copyright (c) 1998-2024 Apryse Group NV
44
Authors: Apryse Software.
55
66
This program is offered under a commercial and under the AGPL license.

pdfocr-api/src/main/java/com/itextpdf/pdfocr/statistics/PdfOcrOutputTypeStatisticsAggregator.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*
22
This file is part of the iText (R) project.
3-
Copyright (c) 1998-2023 Apryse Group NV
3+
Copyright (c) 1998-2024 Apryse Group NV
44
Authors: Apryse Software.
55
66
This program is offered under a commercial and under the AGPL license.

pdfocr-api/src/main/java/com/itextpdf/pdfocr/statistics/PdfOcrOutputTypeStatisticsEvent.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*
22
This file is part of the iText (R) project.
3-
Copyright (c) 1998-2023 Apryse Group NV
3+
Copyright (c) 1998-2024 Apryse Group NV
44
Authors: Apryse Software.
55
66
This program is offered under a commercial and under the AGPL license.
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
/*
2+
This file is part of the iText (R) project.
3+
Copyright (c) 1998-2024 Apryse Group NV
4+
Authors: Apryse Software.
5+
6+
This program is offered under a commercial and under the AGPL license.
7+
For commercial licensing, contact us at https://itextpdf.com/sales. For AGPL licensing, see below.
8+
9+
AGPL licensing:
10+
This program is free software: you can redistribute it and/or modify
11+
it under the terms of the GNU Affero General Public License as published by
12+
the Free Software Foundation, either version 3 of the License, or
13+
(at your option) any later version.
14+
15+
This program is distributed in the hope that it will be useful,
16+
but WITHOUT ANY WARRANTY; without even the implied warranty of
17+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18+
GNU Affero General Public License for more details.
19+
20+
You should have received a copy of the GNU Affero General Public License
21+
along with this program. If not, see <https://www.gnu.org/licenses/>.
22+
*/
23+
package com.itextpdf.pdfocr.structuretree;
24+
25+
/**
26+
* This class represents artifact structure tree item. Attaching such item to the text info means that
27+
* the text will be marked as artifact.
28+
*/
29+
public final class ArtifactItem extends LogicalStructureTreeItem {
30+
private final static ArtifactItem ARTIFACT_INSTANCE = new ArtifactItem();
31+
32+
private ArtifactItem() {
33+
super();
34+
}
35+
36+
/**
37+
* Retrieve an instance of {@link ArtifactItem}.
38+
*
39+
* @return an instance of {@link ArtifactItem}.
40+
*/
41+
public static ArtifactItem getInstance() {
42+
return ARTIFACT_INSTANCE;
43+
}
44+
}

0 commit comments

Comments
 (0)