Skip to content

Commit 21378e9

Browse files
author
dmitry.radchuk
committed
Added input related exceptions in pdfOcr
DEVSIX-5742 Autoported commit. Original commit hash: [2934e70] Manual files: pdfocr-tesseract4/src/main/java/com/itextpdf/pdfocr/tesseract4/TesseractOcrUtil.java Failed to automatically remove sharp analogs of next files: pdfocr-api/src/main/java/com/itextpdf/pdfocr/exceptions/OcrException.java pdfocr-tesseract4/src/main/java/com/itextpdf/pdfocr/tesseract4/exceptions/Tesseract4OcrException.java
1 parent 8d017c8 commit 21378e9

32 files changed

+487
-142
lines changed

itext.tests/itext.pdfocr.api.tests/itext/pdfocr/PdfA3uTest.cs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@ public virtual void TestPdfCustomMetadata() {
106106
pdfDocument.Close();
107107
}
108108

109-
[LogMessage(OcrException.CANNOT_CREATE_PDF_DOCUMENT, Count = 1)]
109+
[LogMessage(PdfOcrExceptionMessageConstant.CANNOT_CREATE_PDF_DOCUMENT, Count = 1)]
110110
[NUnit.Framework.Test]
111111
public virtual void TestNonCompliantThaiPdfA() {
112112
NUnit.Framework.Assert.That(() => {
@@ -119,7 +119,7 @@ public virtual void TestNonCompliantThaiPdfA() {
119119
PdfHelper.CreatePdfA(pdfPath, new FileInfo(path), ocrPdfCreatorProperties, PdfHelper.GetRGBPdfOutputIntent
120120
());
121121
}
122-
, NUnit.Framework.Throws.InstanceOf<OcrException>().With.Message.EqualTo(MessageFormatUtil.Format(OcrException.CANNOT_CREATE_PDF_DOCUMENT, MessageFormatUtil.Format(PdfOcrLogMessageConstant.COULD_NOT_FIND_CORRESPONDING_GLYPH_TO_UNICODE_CHARACTER, 3611))))
122+
, NUnit.Framework.Throws.InstanceOf<PdfOcrException>().With.Message.EqualTo(MessageFormatUtil.Format(PdfOcrExceptionMessageConstant.CANNOT_CREATE_PDF_DOCUMENT, MessageFormatUtil.Format(PdfOcrLogMessageConstant.COULD_NOT_FIND_CORRESPONDING_GLYPH_TO_UNICODE_CHARACTER, 3611))))
123123
;
124124
}
125125

@@ -149,7 +149,7 @@ public virtual void TestCompliantThaiPdfA() {
149149
NUnit.Framework.Assert.IsTrue(font.IsEmbedded());
150150
}
151151

152-
[LogMessage(OcrException.CANNOT_CREATE_PDF_DOCUMENT, Count = 1)]
152+
[LogMessage(PdfOcrExceptionMessageConstant.CANNOT_CREATE_PDF_DOCUMENT, Count = 1)]
153153
[NUnit.Framework.Test]
154154
public virtual void TestPdfACreateWithoutPdfLangProperty() {
155155
NUnit.Framework.Assert.That(() => {
@@ -159,7 +159,7 @@ public virtual void TestPdfACreateWithoutPdfLangProperty() {
159159
PdfHelper.CreatePdfA(pdfPath, new FileInfo(path), new OcrPdfCreatorProperties(), PdfHelper.GetRGBPdfOutputIntent
160160
());
161161
}
162-
, NUnit.Framework.Throws.InstanceOf<OcrException>().With.Message.EqualTo(MessageFormatUtil.Format(OcrException.CANNOT_CREATE_PDF_DOCUMENT, PdfOcrLogMessageConstant.PDF_LANGUAGE_PROPERTY_IS_NOT_SET)))
162+
, NUnit.Framework.Throws.InstanceOf<PdfOcrException>().With.Message.EqualTo(MessageFormatUtil.Format(PdfOcrExceptionMessageConstant.CANNOT_CREATE_PDF_DOCUMENT, PdfOcrLogMessageConstant.PDF_LANGUAGE_PROPERTY_IS_NOT_SET)))
163163
;
164164
}
165165
}

itext.tests/itext.pdfocr.api.tests/itext/pdfocr/PdfCreatorUtilTest.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ public virtual void GetImageDataFromNotExistingImageTest() {
7272
NUnit.Framework.Assert.That(() => {
7373
PdfCreatorUtil.GetImageData(new FileInfo("no such path"), null);
7474
}
75-
, NUnit.Framework.Throws.InstanceOf<OcrException>())
75+
, NUnit.Framework.Throws.InstanceOf<PdfOcrInputException>())
7676
;
7777
}
7878

@@ -82,7 +82,7 @@ public virtual void GetImageDataFromInvalidImageTest() {
8282
NUnit.Framework.Assert.That(() => {
8383
PdfCreatorUtil.GetImageData(new FileInfo(PdfHelper.GetImagesTestDirectory() + "corrupted.jpg"), null);
8484
}
85-
, NUnit.Framework.Throws.InstanceOf<OcrException>().With.Message.EqualTo(MessageFormatUtil.Format(OcrException.CANNOT_READ_INPUT_IMAGE)))
85+
, NUnit.Framework.Throws.InstanceOf<PdfOcrInputException>().With.Message.EqualTo(MessageFormatUtil.Format(PdfOcrExceptionMessageConstant.CANNOT_READ_INPUT_IMAGE)))
8686
;
8787
}
8888
}

itext.tests/itext.pdfocr.api.tests/itext/pdfocr/PdfFontTest.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ public virtual void TestFontColor() {
5353
}
5454

5555
[LogMessage(PdfOcrLogMessageConstant.PROVIDED_FONT_PROVIDER_IS_INVALID, Count = 1)]
56-
[LogMessage(OcrException.CANNOT_CREATE_PDF_DOCUMENT, Count = 1)]
56+
[LogMessage(PdfOcrExceptionMessageConstant.CANNOT_CREATE_PDF_DOCUMENT, Count = 1)]
5757
[NUnit.Framework.Test]
5858
public virtual void TestInvalidFontWithInvalidDefaultFontFamily() {
5959
NUnit.Framework.Assert.That(() => {
@@ -71,7 +71,7 @@ public virtual void TestInvalidFontWithInvalidDefaultFontFamily() {
7171
NUnit.Framework.Assert.AreEqual(PdfHelper.DEFAULT_TEXT, result);
7272
NUnit.Framework.Assert.AreEqual(ScaleMode.SCALE_TO_FIT, properties.GetScaleMode());
7373
}
74-
, NUnit.Framework.Throws.InstanceOf<OcrException>().With.Message.EqualTo(MessageFormatUtil.Format(OcrException.CANNOT_CREATE_PDF_DOCUMENT, OcrException.CANNOT_RESOLVE_PROVIDED_FONTS)))
74+
, NUnit.Framework.Throws.InstanceOf<PdfOcrException>().With.Message.EqualTo(MessageFormatUtil.Format(PdfOcrExceptionMessageConstant.CANNOT_CREATE_PDF_DOCUMENT, PdfOcrExceptionMessageConstant.CANNOT_RESOLVE_PROVIDED_FONTS)))
7575
;
7676
}
7777

itext.tests/itext.pdfocr.api.tests/itext/pdfocr/PdfInputImageTest.cs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ public virtual void TestCorruptedImage() {
3939
NUnit.Framework.Assert.IsNotNull(realOutput);
4040
NUnit.Framework.Assert.AreEqual("", realOutput);
4141
}
42-
, NUnit.Framework.Throws.InstanceOf<OcrException>())
42+
, NUnit.Framework.Throws.InstanceOf<PdfOcrInputException>())
4343
;
4444
}
4545

@@ -52,7 +52,7 @@ public virtual void TestCorruptedImageWithoutExtension() {
5252
NUnit.Framework.Assert.IsNotNull(realOutput);
5353
NUnit.Framework.Assert.AreEqual("", realOutput);
5454
}
55-
, NUnit.Framework.Throws.InstanceOf<OcrException>())
55+
, NUnit.Framework.Throws.InstanceOf<PdfOcrInputException>())
5656
;
5757
}
5858

@@ -65,7 +65,7 @@ public virtual void TestInvalidImagePathWithoutDot() {
6565
NUnit.Framework.Assert.IsNotNull(realOutput);
6666
NUnit.Framework.Assert.AreEqual("", realOutput);
6767
}
68-
, NUnit.Framework.Throws.InstanceOf<OcrException>())
68+
, NUnit.Framework.Throws.InstanceOf<PdfOcrInputException>())
6969
;
7070
}
7171

@@ -78,7 +78,7 @@ public virtual void TestInvalidImagePathWithDot() {
7878
NUnit.Framework.Assert.IsNotNull(realOutput);
7979
NUnit.Framework.Assert.AreEqual("", realOutput);
8080
}
81-
, NUnit.Framework.Throws.InstanceOf<OcrException>())
81+
, NUnit.Framework.Throws.InstanceOf<PdfOcrInputException>())
8282
;
8383
}
8484

@@ -91,7 +91,7 @@ public virtual void TestValidImageWithoutExtension() {
9191
NUnit.Framework.Assert.IsNotNull(realOutput);
9292
NUnit.Framework.Assert.AreEqual("", realOutput);
9393
}
94-
, NUnit.Framework.Throws.InstanceOf<OcrException>())
94+
, NUnit.Framework.Throws.InstanceOf<PdfOcrInputException>())
9595
;
9696
}
9797
}
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
/*
2+
This file is part of the iText (R) project.
3+
Copyright (c) 1998-2021 iText Group NV
4+
Authors: iText Software.
5+
6+
This program is offered under a commercial and under the AGPL license.
7+
For commercial licensing, contact us at https://itextpdf.com/sales. For AGPL licensing, see below.
8+
9+
AGPL licensing:
10+
This program is free software: you can redistribute it and/or modify
11+
it under the terms of the GNU Affero General Public License as published by
12+
the Free Software Foundation, either version 3 of the License, or
13+
(at your option) any later version.
14+
15+
This program is distributed in the hope that it will be useful,
16+
but WITHOUT ANY WARRANTY; without even the implied warranty of
17+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18+
GNU Affero General Public License for more details.
19+
20+
You should have received a copy of the GNU Affero General Public License
21+
along with this program. If not, see <https://www.gnu.org/licenses/>.
22+
*/
23+
using System;
24+
using iText.Test;
25+
26+
namespace iText.Pdfocr.Exceptions {
27+
public class PdfOcrExceptionTest : ExtendedITextTest {
28+
[NUnit.Framework.Test]
29+
public virtual void OcrExceptionThrowableConstructorTest() {
30+
Exception cause = new System.IO.IOException();
31+
PdfOcrException exception = new PdfOcrException(cause);
32+
NUnit.Framework.Assert.AreEqual(cause, exception.InnerException);
33+
}
34+
35+
[NUnit.Framework.Test]
36+
public virtual void OcrInputExceptionThrowableConstructorTest() {
37+
Exception cause = new System.IO.IOException();
38+
PdfOcrException exception = new PdfOcrInputException(cause);
39+
NUnit.Framework.Assert.AreEqual(cause, exception.InnerException);
40+
}
41+
42+
[NUnit.Framework.Test]
43+
public virtual void OcrInputExceptionStringConstructorTest() {
44+
String message = "test message";
45+
PdfOcrException exception = new PdfOcrInputException(message);
46+
NUnit.Framework.Assert.AreEqual(message, exception.Message);
47+
}
48+
49+
[NUnit.Framework.Test]
50+
public virtual void OcrExceptiongetMessageParamsTest() {
51+
String message = "test message {0}";
52+
String param = "param";
53+
String expectedMessage = "test message param";
54+
PdfOcrException exception = new PdfOcrInputException(message);
55+
exception.SetMessageParams(param);
56+
NUnit.Framework.Assert.AreEqual(expectedMessage, exception.Message);
57+
}
58+
}
59+
}

itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/TesseractExecutableIntegrationTest.cs

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ You should have received a copy of the GNU Affero General Public License
2828

2929
namespace iText.Pdfocr {
3030
public class TesseractExecutableIntegrationTest : IntegrationTestHelper {
31-
[LogMessage(Tesseract4OcrException.CANNOT_FIND_PATH_TO_TESSERACT_EXECUTABLE, Count = 1)]
31+
[LogMessage(PdfOcrTesseract4ExceptionMessageConstant.CANNOT_FIND_PATH_TO_TESSERACT_EXECUTABLE, Count = 1)]
3232
[NUnit.Framework.Test]
3333
public virtual void TestNullPathToTesseractExecutable() {
3434
NUnit.Framework.Assert.That(() => {
@@ -38,31 +38,31 @@ public virtual void TestNullPathToTesseractExecutable() {
3838
tesseractExecutableReader.SetPathToExecutable(null);
3939
GetTextFromPdf(tesseractExecutableReader, file);
4040
}
41-
, NUnit.Framework.Throws.InstanceOf<Tesseract4OcrException>().With.Message.EqualTo(Tesseract4OcrException.CANNOT_FIND_PATH_TO_TESSERACT_EXECUTABLE))
41+
, NUnit.Framework.Throws.InstanceOf<PdfOcrTesseract4Exception>().With.Message.EqualTo(PdfOcrTesseract4ExceptionMessageConstant.CANNOT_FIND_PATH_TO_TESSERACT_EXECUTABLE))
4242
;
4343
}
4444

45-
[LogMessage(Tesseract4OcrException.CANNOT_FIND_PATH_TO_TESSERACT_EXECUTABLE, Count = 1)]
45+
[LogMessage(PdfOcrTesseract4ExceptionMessageConstant.CANNOT_FIND_PATH_TO_TESSERACT_EXECUTABLE, Count = 1)]
4646
[NUnit.Framework.Test]
4747
public virtual void TestEmptyPathToTesseractExecutable() {
4848
NUnit.Framework.Assert.That(() => {
4949
FileInfo file = new FileInfo(TEST_IMAGES_DIRECTORY + "spanish_01.jpg");
5050
GetTextFromPdf(new Tesseract4ExecutableOcrEngine("", new Tesseract4OcrEngineProperties()), file);
5151
}
52-
, NUnit.Framework.Throws.InstanceOf<Tesseract4OcrException>().With.Message.EqualTo(Tesseract4OcrException.CANNOT_FIND_PATH_TO_TESSERACT_EXECUTABLE))
52+
, NUnit.Framework.Throws.InstanceOf<PdfOcrTesseract4Exception>().With.Message.EqualTo(PdfOcrTesseract4ExceptionMessageConstant.CANNOT_FIND_PATH_TO_TESSERACT_EXECUTABLE))
5353
;
5454
}
5555

5656
[LogMessage(Tesseract4LogMessageConstant.COMMAND_FAILED, Count = 1)]
57-
[LogMessage(Tesseract4OcrException.TESSERACT_NOT_FOUND, Count = 1)]
57+
[LogMessage(PdfOcrTesseract4ExceptionMessageConstant.TESSERACT_NOT_FOUND, Count = 1)]
5858
[NUnit.Framework.Test]
5959
public virtual void TestIncorrectPathToTesseractExecutable() {
6060
NUnit.Framework.Assert.That(() => {
6161
FileInfo file = new FileInfo(TEST_IMAGES_DIRECTORY + "spanish_01.jpg");
6262
GetTextFromPdf(new Tesseract4ExecutableOcrEngine("path\\to\\executable\\", new Tesseract4OcrEngineProperties
6363
()), file);
6464
}
65-
, NUnit.Framework.Throws.InstanceOf<Tesseract4OcrException>().With.Message.EqualTo(Tesseract4OcrException.TESSERACT_NOT_FOUND))
65+
, NUnit.Framework.Throws.InstanceOf<PdfOcrTesseract4Exception>().With.Message.EqualTo(PdfOcrTesseract4ExceptionMessageConstant.TESSERACT_NOT_FOUND))
6666
;
6767
}
6868
}

itext.tests/itext.pdfocr.tesseract4.tests/itext/pdfocr/actions/Tesseract4EventHandlingTest.cs

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,8 @@ public virtual void OcrPdfCreatorCreatePdfFileNoImageTest() {
6565
IList<FileInfo> images = JavaCollectionsUtil.SingletonList(imgFile);
6666
FileInfo outPdfFile = FileUtil.CreateTempFile("test", ".pdf");
6767
OcrPdfCreator ocrPdfCreator = new OcrPdfCreator(tesseractReader);
68-
NUnit.Framework.Assert.Catch(typeof(OcrException), () => ocrPdfCreator.CreatePdfFile(images, outPdfFile));
68+
NUnit.Framework.Assert.Catch(typeof(PdfOcrException), () => ocrPdfCreator.CreatePdfFile(images, outPdfFile
69+
));
6970
// check ocr events
7071
NUnit.Framework.Assert.AreEqual(0, eventsHandler.GetEvents().Count);
7172
}
@@ -157,7 +158,7 @@ public virtual void OcrPdfCreatorCreatePdfNoImageTest() {
157158
FileInfo outPdfFile = FileUtil.CreateTempFile("test", ".pdf");
158159
PdfWriter pdfWriter = new PdfWriter(outPdfFile);
159160
OcrPdfCreator ocrPdfCreator = new OcrPdfCreator(tesseractReader);
160-
NUnit.Framework.Assert.Catch(typeof(Tesseract4OcrException), () => ocrPdfCreator.CreatePdf(images, pdfWriter
161+
NUnit.Framework.Assert.Catch(typeof(PdfOcrTesseract4Exception), () => ocrPdfCreator.CreatePdf(images, pdfWriter
161162
));
162163
pdfWriter.Dispose();
163164
NUnit.Framework.Assert.AreEqual(0, eventsHandler.GetEvents().Count);
@@ -228,7 +229,7 @@ public virtual void DoImageOcrTest() {
228229
[LogMessage(Tesseract4LogMessageConstant.CANNOT_READ_INPUT_IMAGE)]
229230
public virtual void DoImageOcrNoImageTest() {
230231
FileInfo imgFile = new FileInfo("uncknown");
231-
NUnit.Framework.Assert.Catch(typeof(OcrException), () => tesseractReader.DoImageOcr(imgFile));
232+
NUnit.Framework.Assert.Catch(typeof(PdfOcrException), () => tesseractReader.DoImageOcr(imgFile));
232233
NUnit.Framework.Assert.AreEqual(0, eventsHandler.GetEvents().Count);
233234
}
234235

@@ -280,8 +281,8 @@ public virtual void CreateTxtFileNoImageTest() {
280281
FileInfo imgFile = new FileInfo("no_image");
281282
IList<FileInfo> images = JavaUtil.ArraysAsList(imgFile, imgFile);
282283
FileInfo outPdfFile = FileUtil.CreateTempFile("test", ".txt");
283-
NUnit.Framework.Assert.Catch(typeof(OcrException), () => tesseractReader.CreateTxtFile(images, outPdfFile)
284-
);
284+
NUnit.Framework.Assert.Catch(typeof(PdfOcrException), () => tesseractReader.CreateTxtFile(images, outPdfFile
285+
));
285286
// only one usage event is expected and it is not confirmed (no confirm event
286287
NUnit.Framework.Assert.AreEqual(1, eventsHandler.GetEvents().Count);
287288
ValidateUsageEvent(eventsHandler.GetEvents()[0], EventConfirmationType.ON_DEMAND);
@@ -292,9 +293,9 @@ public virtual void CreateTxtFileNoFileTest() {
292293
FileInfo imgFile = new FileInfo(TEST_IMAGES_DIRECTORY + "numbers_01.jpg");
293294
IList<FileInfo> images = JavaUtil.ArraysAsList(imgFile, imgFile);
294295
FileInfo outPdfFile = new FileInfo("nopath/nofile");
295-
Exception e = NUnit.Framework.Assert.Catch(typeof(Tesseract4OcrException), () => tesseractReader.CreateTxtFile
296+
Exception e = NUnit.Framework.Assert.Catch(typeof(PdfOcrTesseract4Exception), () => tesseractReader.CreateTxtFile
296297
(images, outPdfFile));
297-
NUnit.Framework.Assert.AreEqual(Tesseract4OcrException.CANNOT_WRITE_TO_FILE, e.Message);
298+
NUnit.Framework.Assert.AreEqual(PdfOcrTesseract4ExceptionMessageConstant.CANNOT_WRITE_TO_FILE, e.Message);
298299
NUnit.Framework.Assert.AreEqual(3, eventsHandler.GetEvents().Count);
299300
IEvent usageEvent = eventsHandler.GetEvents()[0];
300301
ValidateUsageEvent(usageEvent, EventConfirmationType.ON_DEMAND);
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
/*
2+
This file is part of the iText (R) project.
3+
Copyright (c) 1998-2021 iText Group NV
4+
Authors: iText Software.
5+
6+
This program is offered under a commercial and under the AGPL license.
7+
For commercial licensing, contact us at https://itextpdf.com/sales. For AGPL licensing, see below.
8+
9+
AGPL licensing:
10+
This program is free software: you can redistribute it and/or modify
11+
it under the terms of the GNU Affero General Public License as published by
12+
the Free Software Foundation, either version 3 of the License, or
13+
(at your option) any later version.
14+
15+
This program is distributed in the hope that it will be useful,
16+
but WITHOUT ANY WARRANTY; without even the implied warranty of
17+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18+
GNU Affero General Public License for more details.
19+
20+
You should have received a copy of the GNU Affero General Public License
21+
along with this program. If not, see <https://www.gnu.org/licenses/>.
22+
*/
23+
using System;
24+
using iText.Pdfocr.Tesseract4.Exceptions;
25+
using iText.Test;
26+
27+
namespace iText.Pdfocr.Exceptions {
28+
public class PdfOcrTesseract4ExceptionTest : ExtendedITextTest {
29+
[NUnit.Framework.Test]
30+
public virtual void Tesseract4PdfOcrExceptionThrowableConstructorTest() {
31+
Exception cause = new System.IO.IOException();
32+
PdfOcrTesseract4Exception exception = new PdfOcrTesseract4Exception(cause);
33+
NUnit.Framework.Assert.AreEqual(cause, exception.InnerException);
34+
}
35+
36+
[NUnit.Framework.Test]
37+
public virtual void Tesseract4PdfOcrInputExceptionThrowableConstructorTest() {
38+
Exception cause = new System.IO.IOException();
39+
PdfOcrTesseract4Exception exception = new PdfOcrInputTesseract4Exception(cause);
40+
NUnit.Framework.Assert.AreEqual(cause, exception.InnerException);
41+
}
42+
}
43+
}

0 commit comments

Comments
 (0)