Skip to content

Commit 923548d

Browse files
committed
Merge branch 'release_branch_DEVSIX-5576' into master-rc
2 parents d143306 + fb9aa93 commit 923548d

File tree

84 files changed

+3568
-1872
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

84 files changed

+3568
-1872
lines changed

pdfocr-api/pom.xml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
<parent>
66
<groupId>com.itextpdf</groupId>
77
<artifactId>pdfocr-root</artifactId>
8-
<version>1.0.3</version>
8+
<version>2.0.0</version>
99
</parent>
1010

1111
<artifactId>pdfocr-api</artifactId>
@@ -47,4 +47,4 @@
4747
</resource>
4848
</resources>
4949
</build>
50-
</project>
50+
</project>
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
/*
2+
This file is part of the iText (R) project.
3+
Copyright (c) 1998-2021 iText Group NV
4+
Authors: iText Software.
5+
6+
This program is offered under a commercial and under the AGPL license.
7+
For commercial licensing, contact us at https://itextpdf.com/sales. For AGPL licensing, see below.
8+
9+
AGPL licensing:
10+
This program is free software: you can redistribute it and/or modify
11+
it under the terms of the GNU Affero General Public License as published by
12+
the Free Software Foundation, either version 3 of the License, or
13+
(at your option) any later version.
14+
15+
This program is distributed in the hope that it will be useful,
16+
but WITHOUT ANY WARRANTY; without even the implied warranty of
17+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18+
GNU Affero General Public License for more details.
19+
20+
You should have received a copy of the GNU Affero General Public License
21+
along with this program. If not, see <https://www.gnu.org/licenses/>.
22+
*/
23+
package com.itextpdf.pdfocr;
24+
25+
import com.itextpdf.commons.actions.AbstractITextEvent;
26+
import com.itextpdf.commons.actions.AbstractProductITextEvent;
27+
import com.itextpdf.commons.actions.confirmations.EventConfirmationType;
28+
import com.itextpdf.commons.actions.sequence.SequenceId;
29+
30+
/**
31+
* Helper class for working with events. This class is for internal usage.
32+
*/
33+
public abstract class AbstractPdfOcrEventHelper extends AbstractITextEvent {
34+
35+
/**
36+
* Handles the event.
37+
*
38+
* @param event event
39+
*/
40+
public abstract void onEvent(AbstractProductITextEvent event);
41+
42+
/**
43+
* Returns the sequence id
44+
*
45+
* @return sequence id
46+
*/
47+
public abstract SequenceId getSequenceId();
48+
49+
/**
50+
* Returns the confirmation type of event.
51+
*
52+
* @return event confirmation type
53+
*/
54+
public abstract EventConfirmationType getConfirmationType();
55+
}

pdfocr-api/src/main/java/com/itextpdf/pdfocr/IOcrEngine.java

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,21 @@ public interface IOcrEngine {
4848
*/
4949
Map<Integer, List<TextInfo>> doImageOcr(File input);
5050

51+
/**
52+
* Reads data from the provided input image file and returns retrieved data
53+
* in the format described below.
54+
*
55+
* @param input input image {@link java.io.File}
56+
* @param ocrProcessContext ocr processing context
57+
*
58+
* @return {@link java.util.Map} where key is {@link java.lang.Integer}
59+
* representing the number of the page and value is
60+
* {@link java.util.List} of {@link TextInfo} elements where each
61+
* {@link TextInfo} element contains a word or a line and its 4
62+
* coordinates(bbox)
63+
*/
64+
Map<Integer, List<TextInfo>> doImageOcr(File input, OcrProcessContext ocrProcessContext);
65+
5166
/**
5267
* Performs OCR using provided {@link IOcrEngine} for the given list of
5368
* input images and saves output to a text file using provided path.
@@ -58,4 +73,16 @@ public interface IOcrEngine {
5873
* @param txtFile file to be created
5974
*/
6075
void createTxtFile(List<File> inputImages, File txtFile);
76+
77+
/**
78+
* Performs OCR using provided {@link IOcrEngine} for the given list of
79+
* input images and saves output to a text file using provided path.
80+
* Note that a human reading order is not guaranteed
81+
* due to possible specifics of input images (multi column layout, tables etc)
82+
*
83+
* @param inputImages {@link java.util.List} of images to be OCRed
84+
* @param txtFile file to be created
85+
* @param ocrProcessContext ocr processing context
86+
*/
87+
void createTxtFile(List<File> inputImages, File txtFile, OcrProcessContext ocrProcessContext);
6188
}
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
/*
2+
This file is part of the iText (R) project.
3+
Copyright (c) 1998-2021 iText Group NV
4+
Authors: iText Software.
5+
6+
This program is offered under a commercial and under the AGPL license.
7+
For commercial licensing, contact us at https://itextpdf.com/sales. For AGPL licensing, see below.
8+
9+
AGPL licensing:
10+
This program is free software: you can redistribute it and/or modify
11+
it under the terms of the GNU Affero General Public License as published by
12+
the Free Software Foundation, either version 3 of the License, or
13+
(at your option) any later version.
14+
15+
This program is distributed in the hope that it will be useful,
16+
but WITHOUT ANY WARRANTY; without even the implied warranty of
17+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18+
GNU Affero General Public License for more details.
19+
20+
You should have received a copy of the GNU Affero General Public License
21+
along with this program. If not, see <https://www.gnu.org/licenses/>.
22+
*/
23+
package com.itextpdf.pdfocr;
24+
25+
import com.itextpdf.commons.actions.data.ProductData;
26+
27+
/**
28+
* The interface that holds information about product data and meta info.
29+
*/
30+
public interface IProductAware {
31+
32+
/**
33+
* Gets the container with meta info.
34+
*
35+
* @return the held meta info container
36+
*/
37+
PdfOcrMetaInfoContainer getMetaInfoContainer();
38+
39+
/**
40+
* Gets object containing information about the product.
41+
*
42+
* @return product data
43+
*/
44+
ProductData getProductData();
45+
}

0 commit comments

Comments
 (0)