Skip to content

Commit cabe77b

Browse files
committed
[RELEASE] iText pdfOCR 4.0.2
2 parents 7a1490f + cfad76c commit cabe77b

File tree

10 files changed

+115
-18
lines changed

10 files changed

+115
-18
lines changed

pdfocr-api/pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
<parent>
66
<groupId>com.itextpdf</groupId>
77
<artifactId>pdfocr-root</artifactId>
8-
<version>4.0.1</version>
8+
<version>4.0.2</version>
99
</parent>
1010

1111
<artifactId>pdfocr-api</artifactId>
Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
/*
2+
This file is part of the iText (R) project.
3+
Copyright (c) 1998-2025 Apryse Group NV
4+
Authors: Apryse Software.
5+
6+
This program is offered under a commercial and under the AGPL license.
7+
For commercial licensing, contact us at https://itextpdf.com/sales. For AGPL licensing, see below.
8+
9+
AGPL licensing:
10+
This program is free software: you can redistribute it and/or modify
11+
it under the terms of the GNU Affero General Public License as published by
12+
the Free Software Foundation, either version 3 of the License, or
13+
(at your option) any later version.
14+
15+
This program is distributed in the hope that it will be useful,
16+
but WITHOUT ANY WARRANTY; without even the implied warranty of
17+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18+
GNU Affero General Public License for more details.
19+
20+
You should have received a copy of the GNU Affero General Public License
21+
along with this program. If not, see <https://www.gnu.org/licenses/>.
22+
*/
23+
package com.itextpdf.pdfocr;
24+
25+
import java.util.AbstractMap.SimpleImmutableEntry;
26+
import java.util.Collection;
27+
import java.util.Collections;
28+
import java.util.List;
29+
import sharpen.config.MappingConfiguration;
30+
import sharpen.config.MappingConfigurator;
31+
import sharpen.config.ModuleOption;
32+
import sharpen.config.ModulesConfigurator;
33+
import sharpen.config.OptionsConfigurator;
34+
35+
public class SharpenConfigMapping implements MappingConfiguration {
36+
37+
@Override
38+
public int getMappingPriority() {
39+
return 6;
40+
}
41+
42+
@Override
43+
public String getModuleName() {
44+
return "pdfocr-api";
45+
}
46+
47+
@Override
48+
public void applyMappingConfiguration(MappingConfigurator configurator) {
49+
configurator.mapStringLiteral("com.itextpdf.pdfocr.PdfOcrFontProvider.DEFAULT_FONT_PATH", "iText.Pdfocr.Api.font.LiberationSans-Regular.ttf");
50+
51+
// It's fine to use System.IO.StreamWriter in this mapping instead of the iText.Commons.Utils.FormattingStreamWriter
52+
// because java.io.FileWriter only works with the characters writing, so there are no locale-dependent conversions.
53+
configurator.ignoreUsing("Java.IO");
54+
configurator.ignoreUsing("Java.Nio");
55+
// images
56+
configurator.addFullName("System.Drawing.Bitmap");
57+
configurator.ignoreUsing("Javax.Imageio");
58+
}
59+
60+
@Override
61+
public void applySharpenOptions(OptionsConfigurator configurator) {
62+
63+
}
64+
65+
@Override
66+
public void applyConfigModuleSettings(ModulesConfigurator configurator) {
67+
68+
}
69+
70+
@Override
71+
public Collection<ModuleOption> getAvailableModuleSettings() {
72+
return Collections.EMPTY_SET;
73+
}
74+
75+
@Override
76+
public Collection<String> getDependencies() {
77+
return Collections.EMPTY_LIST;
78+
}
79+
80+
@Override
81+
public Collection<String> getIgnoredSourceFiles() {
82+
return Collections.EMPTY_LIST;
83+
}
84+
85+
@Override
86+
public Collection<String> getIgnoredResources() {
87+
return Collections.EMPTY_LIST;
88+
}
89+
90+
@Override
91+
public List<SimpleImmutableEntry<String, String>> getOverwrittenResources() {
92+
return Collections.EMPTY_LIST;
93+
}
94+
}
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
com.itextpdf.pdfocr.SharpenConfigMapping

pdfocr-tesseract4/pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
<parent>
66
<groupId>com.itextpdf</groupId>
77
<artifactId>pdfocr-root</artifactId>
8-
<version>4.0.1</version>
8+
<version>4.0.2</version>
99
</parent>
1010

1111
<artifactId>pdfocr-tesseract4</artifactId>

pdfocr-tesseract4/src/main/java/com/itextpdf/pdfocr/tesseract4/AbstractTesseract4OcrEngine.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ This file is part of the iText (R) project.
2727
import com.itextpdf.commons.actions.contexts.IMetaInfo;
2828
import com.itextpdf.commons.actions.data.ProductData;
2929
import com.itextpdf.commons.utils.MessageFormatUtil;
30+
import com.itextpdf.commons.utils.StringNormalizer;
3031
import com.itextpdf.io.image.ImageType;
3132
import com.itextpdf.pdfocr.AbstractPdfOcrEventHelper;
3233
import com.itextpdf.pdfocr.IOcrEngine;
@@ -315,7 +316,7 @@ public final String doImageOcr(final File input,
315316
* @return boolean true is current os is windows, otherwise - false
316317
*/
317318
public boolean isWindows() {
318-
return identifyOsType().toLowerCase().contains("win");
319+
return StringNormalizer.toLowerCase(identifyOsType()).contains("win");
319320
}
320321

321322
/**
@@ -326,7 +327,7 @@ public boolean isWindows() {
326327
public String identifyOsType() {
327328
String os = System.getProperty("os.name") == null
328329
? System.getProperty("OS") : System.getProperty("os.name");
329-
return os.toLowerCase();
330+
return StringNormalizer.toLowerCase(os);
330331
}
331332

332333
/**

pdfocr-tesseract4/src/main/java/com/itextpdf/pdfocr/tesseract4/Tesseract4ExecutableOcrEngine.java

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,12 @@ This file is part of the iText (R) project.
2525
import com.itextpdf.commons.actions.confirmations.ConfirmEvent;
2626
import com.itextpdf.commons.actions.confirmations.EventConfirmationType;
2727
import com.itextpdf.commons.utils.MessageFormatUtil;
28+
import com.itextpdf.commons.utils.StringNormalizer;
29+
import com.itextpdf.pdfocr.AbstractPdfOcrEventHelper;
30+
import com.itextpdf.pdfocr.tesseract4.actions.events.PdfOcrTesseract4ProductEvent;
31+
import com.itextpdf.pdfocr.tesseract4.exceptions.PdfOcrTesseract4Exception;
32+
import com.itextpdf.pdfocr.tesseract4.exceptions.PdfOcrTesseract4ExceptionMessageConstant;
33+
import com.itextpdf.pdfocr.tesseract4.logs.Tesseract4LogMessageConstant;
2834

2935
import java.awt.image.BufferedImage;
3036
import java.io.File;
@@ -35,13 +41,6 @@ This file is part of the iText (R) project.
3541
import java.util.Collections;
3642
import java.util.List;
3743
import java.util.UUID;
38-
39-
import com.itextpdf.pdfocr.AbstractPdfOcrEventHelper;
40-
import com.itextpdf.pdfocr.tesseract4.actions.events.PdfOcrTesseract4ProductEvent;
41-
import com.itextpdf.pdfocr.tesseract4.exceptions.PdfOcrTesseract4Exception;
42-
import com.itextpdf.pdfocr.tesseract4.exceptions.PdfOcrTesseract4ExceptionMessageConstant;
43-
import com.itextpdf.pdfocr.tesseract4.logs.Tesseract4LogMessageConstant;
44-
4544
import net.sourceforge.lept4j.Pix;
4645
import org.slf4j.LoggerFactory;
4746

@@ -478,7 +477,7 @@ private String getExtension(File inputImage) {
478477
String extension = new String(
479478
inputImage.getAbsolutePath().toCharArray(), index,
480479
inputImage.getAbsolutePath().length() - index);
481-
return extension.toLowerCase();
480+
return StringNormalizer.toLowerCase(extension);
482481
}
483482
}
484483
return ".png";

pdfocr-tesseract4/src/main/java/com/itextpdf/pdfocr/tesseract4/Tesseract4OcrEngineProperties.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ This file is part of the iText (R) project.
2424

2525
import com.itextpdf.commons.utils.FileUtil;
2626
import com.itextpdf.commons.utils.MessageFormatUtil;
27+
import com.itextpdf.commons.utils.StringNormalizer;
2728
import com.itextpdf.pdfocr.IOcrEngine;
2829
import com.itextpdf.pdfocr.OcrEngineProperties;
2930
import com.itextpdf.pdfocr.tesseract4.exceptions.PdfOcrTesseract4Exception;
@@ -328,7 +329,7 @@ Tesseract4OcrEngineProperties setUserWords(final String language,
328329
final InputStream inputStream) throws PdfOcrTesseract4Exception {
329330
setPathToUserWordsFile(null);
330331
if (!getLanguages().contains(language)) {
331-
if (DEFAULT_LANGUAGE.equals(language.toLowerCase())) {
332+
if (DEFAULT_LANGUAGE.equals(StringNormalizer.toLowerCase(language))) {
332333
List<String> languagesList = getLanguages();
333334
languagesList.add(language);
334335
setLanguages(languagesList);

pdfocr-tesseract4/src/main/java/com/itextpdf/pdfocr/tesseract4/actions/data/PdfOcrTesseract4ProductData.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ This file is part of the iText (R) project.
3030
public class PdfOcrTesseract4ProductData {
3131
private static final String PDF_OCR_TESSERACT4_PRODUCT_NAME = "pdfOcr-tesseract4";
3232
private static final String PDF_OCR_TESSERACT4_PUBLIC_PRODUCT_NAME = "pdfOCR-Tesseract4";
33-
private static final String PDF_OCR_VERSION = "4.0.1";
33+
private static final String PDF_OCR_VERSION = "4.0.2";
3434
private static final int PDF_OCR_COPYRIGHT_SINCE = 2000;
3535
private static final int PDF_OCR_COPYRIGHT_TO = 2025;
3636

pdfocr-tesseract4/src/test/java/com/itextpdf/pdfocr/imageformats/ImageFormatIntegrationTest.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ This file is part of the iText (R) project.
2323
package com.itextpdf.pdfocr.imageformats;
2424

2525
import com.itextpdf.commons.utils.MessageFormatUtil;
26+
import com.itextpdf.commons.utils.StringNormalizer;
2627
import com.itextpdf.kernel.colors.DeviceCmyk;
2728
import com.itextpdf.kernel.utils.CompareTool;
2829
import com.itextpdf.pdfocr.IntegrationTestHelper;
@@ -49,7 +50,7 @@ public abstract class ImageFormatIntegrationTest extends IntegrationTestHelper {
4950

5051
public ImageFormatIntegrationTest(ReaderType type) {
5152
tesseractReader = getTesseractReader(type);
52-
this.testType = type.toString().toLowerCase();
53+
this.testType = StringNormalizer.toLowerCase(type.toString());
5354
}
5455

5556
@BeforeEach

pom.xml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,12 @@
55
<parent>
66
<groupId>com.itextpdf</groupId>
77
<artifactId>root</artifactId>
8-
<version>9.1.0</version>
8+
<version>9.2.0</version>
99
<relativePath />
1010
</parent>
1111

1212
<artifactId>pdfocr-root</artifactId>
13-
<version>4.0.1</version>
13+
<version>4.0.2</version>
1414
<packaging>pom</packaging>
1515

1616
<name>pdfOCR</name>
@@ -22,7 +22,7 @@
2222
</modules>
2323

2424
<properties>
25-
<itext.version>9.1.0</itext.version>
25+
<itext.version>9.2.0</itext.version>
2626
<java.version>1.8</java.version>
2727
<jdkLevel>${java.version}</jdkLevel>
2828
<maven.compiler.source>${java.version}</maven.compiler.source>

0 commit comments

Comments
 (0)