Skip to content

Commit 7657d96

Browse files
committed
[RELEASE] iText pdfOCR 3.0.2
2 parents eead8fd + e6b9ac2 commit 7657d96

File tree

146 files changed

+1217
-190
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

146 files changed

+1217
-190
lines changed

doxyfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ DOXYFILE_ENCODING = UTF-8
3232
# title of most generated pages and in a few other places.
3333
# The default value is: My Project.
3434

35-
PROJECT_NAME = "pdfOCR 3.0.1 API"
35+
PROJECT_NAME = "pdfOCR 3.0.2 API"
3636

3737
# The PROJECT_NUMBER tag can be used to enter a project or revision number. This
3838
# could be handy for archiving the generated documentation or if some version

itext.tests/itext.pdfocr.api.tests/Properties/AssemblyInfo.cs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,14 +7,14 @@
77
[assembly: AssemblyConfiguration("")]
88
[assembly: AssemblyCompany("Apryse Group NV")]
99
[assembly: AssemblyProduct("iText")]
10-
[assembly: AssemblyCopyright ("Copyright (c) 1998-2023 Apryse Group NV")]
10+
[assembly: AssemblyCopyright("Copyright (c) 1998-2024 Apryse Group NV")]
1111
[assembly: AssemblyTrademark("")]
1212
[assembly: AssemblyCulture("")]
1313

1414
[assembly: ComVisible(false)]
1515

1616
[assembly: Guid("d6a6ea97-1f23-448f-b700-eff62971d234")]
1717

18-
[assembly: AssemblyVersion("3.0.1.0")]
19-
[assembly: AssemblyFileVersion("3.0.1.0")]
20-
[assembly: AssemblyInformationalVersion("3.0.1")]
18+
[assembly: AssemblyVersion("3.0.2.0")]
19+
[assembly: AssemblyFileVersion("3.0.2.0")]
20+
[assembly: AssemblyInformationalVersion("3.0.2")]

itext.tests/itext.pdfocr.api.tests/itext.pdfocr.api.tests.csproj

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,9 @@
2525
</ItemGroup>
2626
<ItemGroup>
2727
<ProjectReference Include="..\..\..\itextcore\itext\itext.pdftest\itext.pdftest.csproj" Condition="Exists('..\..\..\itextcore\itext\itext.pdftest\itext.pdftest.csproj')" />
28-
<PackageReference Include="itext.pdftest" Version="8.0.2" Condition="!Exists('..\..\..\itextcore\itext\itext.pdftest\itext.pdftest.csproj')" />
28+
<PackageReference Include="itext.pdftest" Version="8.0.3" Condition="!Exists('..\..\..\itextcore\itext\itext.pdftest\itext.pdftest.csproj')" />
2929
<ProjectReference Include="..\..\..\itextcore\itext\itext.bouncy-castle-adapter\itext.bouncy-castle-adapter.csproj" Condition="Exists('..\..\..\itextcore\itext\itext.bouncy-castle-adapter\itext.bouncy-castle-adapter.csproj')" />
30-
<PackageReference Include="itext.bouncy-castle-adapter" Version="8.0.2" Condition="!Exists('..\..\..\itextcore\itext\itext.bouncy-castle-adapter\itext.bouncy-castle-adapter.csproj')" />
30+
<PackageReference Include="itext.bouncy-castle-adapter" Version="8.0.3" Condition="!Exists('..\..\..\itextcore\itext\itext.bouncy-castle-adapter\itext.bouncy-castle-adapter.csproj')" />
3131
<PackageReference Include="Microsoft.NET.Test.Sdk" Version="16.6.0" />
3232
<PackageReference Include="NUnit" Version="3.12.0" />
3333
<PackageReference Include="NUnit3TestAdapter" Version="3.16.1">

itext.tests/itext.pdfocr.api.tests/itext/pdfocr/ApiTest.cs

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*
22
This file is part of the iText (R) project.
3-
Copyright (c) 1998-2023 Apryse Group NV
3+
Copyright (c) 1998-2024 Apryse Group NV
44
Authors: Apryse Software.
55
66
This program is offered under a commercial and under the AGPL license.
@@ -30,7 +30,9 @@ You should have received a copy of the GNU Affero General Public License
3030
using iText.Kernel.Font;
3131
using iText.Kernel.Geom;
3232
using iText.Kernel.Pdf;
33+
using iText.Kernel.Utils;
3334
using iText.Pdfa;
35+
using iText.Pdfocr.Exceptions;
3436
using iText.Pdfocr.Helpers;
3537
using iText.Pdfocr.Logs;
3638
using iText.Test;
@@ -193,6 +195,36 @@ public virtual void TestImageRotationHandlerForTiff() {
193195
;
194196
}
195197

198+
[NUnit.Framework.Test]
199+
public virtual void TestTableStructureTree() {
200+
String pdfPath = PdfHelper.GetTargetDirectory() + "tableStructureTree.pdf";
201+
// Image doesn't really matter here
202+
String input = PdfHelper.GetImagesTestDirectory() + "numbers_01.jpg";
203+
IOcrEngine ocrEngine = new TestStructureDetectionOcrEngine();
204+
OcrPdfCreatorProperties creatorProperties = new OcrPdfCreatorProperties();
205+
creatorProperties.SetTextColor(DeviceRgb.RED);
206+
creatorProperties.SetTagged(true);
207+
OcrPdfCreator pdfCreator = new OcrPdfCreator(ocrEngine, creatorProperties);
208+
TestProcessProperties processProperties = new TestProcessProperties(5, 6, 50, 15, 100, 200);
209+
using (PdfWriter pdfWriter = PdfHelper.GetPdfWriter(pdfPath)) {
210+
pdfCreator.CreatePdf(JavaCollectionsUtil.SingletonList<FileInfo>(new FileInfo(input)), pdfWriter, new DocumentProperties
211+
(), processProperties).Close();
212+
}
213+
NUnit.Framework.Assert.IsNull(new CompareTool().CompareByContent(pdfPath, PdfHelper.TEST_DIRECTORY + "cmp_tableStructureTree.pdf"
214+
, PdfHelper.GetTargetDirectory(), "diff_"));
215+
}
216+
217+
[NUnit.Framework.Test]
218+
[LogMessage(PdfOcrExceptionMessageConstant.CANNOT_CREATE_PDF_DOCUMENT, LogLevel = LogLevelConstants.ERROR)]
219+
public virtual void TestTaggingNotSupported() {
220+
String input = PdfHelper.GetImagesTestDirectory() + "numbers_01.jpg";
221+
String pdfPath = PdfHelper.GetTargetDirectory() + "taggingNotSupported.pdf";
222+
Exception e = NUnit.Framework.Assert.Catch(typeof(PdfOcrException), () => PdfHelper.CreatePdf(pdfPath, new
223+
FileInfo(input), new OcrPdfCreatorProperties().SetTagged(true)));
224+
NUnit.Framework.Assert.AreEqual(MessageFormatUtil.Format(PdfOcrExceptionMessageConstant.CANNOT_CREATE_PDF_DOCUMENT
225+
, PdfOcrExceptionMessageConstant.TAGGING_IS_NOT_SUPPORTED), e.Message);
226+
}
227+
196228
internal class NotImplementedImageRotationHandler : IImageRotationHandler {
197229
public virtual ImageData ApplyRotation(ImageData imageData) {
198230
throw new Exception("applyRotation is not implemented");

itext.tests/itext.pdfocr.api.tests/itext/pdfocr/OcrPdfCreatorEventHelperTest.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*
22
This file is part of the iText (R) project.
3-
Copyright (c) 1998-2023 Apryse Group NV
3+
Copyright (c) 1998-2024 Apryse Group NV
44
Authors: Apryse Software.
55
66
This program is offered under a commercial and under the AGPL license.

itext.tests/itext.pdfocr.api.tests/itext/pdfocr/OcrProcessContextTest.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*
22
This file is part of the iText (R) project.
3-
Copyright (c) 1998-2023 Apryse Group NV
3+
Copyright (c) 1998-2024 Apryse Group NV
44
Authors: Apryse Software.
55
66
This program is offered under a commercial and under the AGPL license.

itext.tests/itext.pdfocr.api.tests/itext/pdfocr/PdfA3uTest.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*
22
This file is part of the iText (R) project.
3-
Copyright (c) 1998-2023 Apryse Group NV
3+
Copyright (c) 1998-2024 Apryse Group NV
44
Authors: Apryse Software.
55
66
This program is offered under a commercial and under the AGPL license.

itext.tests/itext.pdfocr.api.tests/itext/pdfocr/PdfCreatorUtilTest.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*
22
This file is part of the iText (R) project.
3-
Copyright (c) 1998-2023 Apryse Group NV
3+
Copyright (c) 1998-2024 Apryse Group NV
44
Authors: Apryse Software.
55
66
This program is offered under a commercial and under the AGPL license.

itext.tests/itext.pdfocr.api.tests/itext/pdfocr/PdfFontTest.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*
22
This file is part of the iText (R) project.
3-
Copyright (c) 1998-2023 Apryse Group NV
3+
Copyright (c) 1998-2024 Apryse Group NV
44
Authors: Apryse Software.
55
66
This program is offered under a commercial and under the AGPL license.

itext.tests/itext.pdfocr.api.tests/itext/pdfocr/PdfInputImageTest.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*
22
This file is part of the iText (R) project.
3-
Copyright (c) 1998-2023 Apryse Group NV
3+
Copyright (c) 1998-2024 Apryse Group NV
44
Authors: Apryse Software.
55
66
This program is offered under a commercial and under the AGPL license.

itext.tests/itext.pdfocr.api.tests/itext/pdfocr/PdfLayersTest.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*
22
This file is part of the iText (R) project.
3-
Copyright (c) 1998-2023 Apryse Group NV
3+
Copyright (c) 1998-2024 Apryse Group NV
44
Authors: Apryse Software.
55
66
This program is offered under a commercial and under the AGPL license.

itext.tests/itext.pdfocr.api.tests/itext/pdfocr/PdfOcrMetaInfoContainerTest.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*
22
This file is part of the iText (R) project.
3-
Copyright (c) 1998-2023 Apryse Group NV
3+
Copyright (c) 1998-2024 Apryse Group NV
44
Authors: Apryse Software.
55
66
This program is offered under a commercial and under the AGPL license.

itext.tests/itext.pdfocr.api.tests/itext/pdfocr/ScaleModeTest.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*
22
This file is part of the iText (R) project.
3-
Copyright (c) 1998-2023 Apryse Group NV
3+
Copyright (c) 1998-2024 Apryse Group NV
44
Authors: Apryse Software.
55
66
This program is offered under a commercial and under the AGPL license.

itext.tests/itext.pdfocr.api.tests/itext/pdfocr/exceptions/PdfOcrExceptionTest.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*
22
This file is part of the iText (R) project.
3-
Copyright (c) 1998-2023 Apryse Group NV
3+
Copyright (c) 1998-2024 Apryse Group NV
44
Authors: Apryse Software.
55
66
This program is offered under a commercial and under the AGPL license.

itext.tests/itext.pdfocr.api.tests/itext/pdfocr/helpers/CustomOcrEngine.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*
22
This file is part of the iText (R) project.
3-
Copyright (c) 1998-2023 Apryse Group NV
3+
Copyright (c) 1998-2024 Apryse Group NV
44
Authors: Apryse Software.
55
66
This program is offered under a commercial and under the AGPL license.

itext.tests/itext.pdfocr.api.tests/itext/pdfocr/helpers/CustomProductAwareOcrEngine.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*
22
This file is part of the iText (R) project.
3-
Copyright (c) 1998-2023 Apryse Group NV
3+
Copyright (c) 1998-2024 Apryse Group NV
44
Authors: Apryse Software.
55
66
This program is offered under a commercial and under the AGPL license.

itext.tests/itext.pdfocr.api.tests/itext/pdfocr/helpers/ExtractionStrategy.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*
22
This file is part of the iText (R) project.
3-
Copyright (c) 1998-2023 Apryse Group NV
3+
Copyright (c) 1998-2024 Apryse Group NV
44
Authors: Apryse Software.
55
66
This program is offered under a commercial and under the AGPL license.

itext.tests/itext.pdfocr.api.tests/itext/pdfocr/helpers/PdfHelper.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*
22
This file is part of the iText (R) project.
3-
Copyright (c) 1998-2023 Apryse Group NV
3+
Copyright (c) 1998-2024 Apryse Group NV
44
Authors: Apryse Software.
55
66
This program is offered under a commercial and under the AGPL license.
Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
/*
2+
This file is part of the iText (R) project.
3+
Copyright (c) 1998-2024 Apryse Group NV
4+
Authors: Apryse Software.
5+
6+
This program is offered under a commercial and under the AGPL license.
7+
For commercial licensing, contact us at https://itextpdf.com/sales. For AGPL licensing, see below.
8+
9+
AGPL licensing:
10+
This program is free software: you can redistribute it and/or modify
11+
it under the terms of the GNU Affero General Public License as published by
12+
the Free Software Foundation, either version 3 of the License, or
13+
(at your option) any later version.
14+
15+
This program is distributed in the hope that it will be useful,
16+
but WITHOUT ANY WARRANTY; without even the implied warranty of
17+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18+
GNU Affero General Public License for more details.
19+
20+
You should have received a copy of the GNU Affero General Public License
21+
along with this program. If not, see <https://www.gnu.org/licenses/>.
22+
*/
23+
using iText.Pdfocr;
24+
25+
namespace iText.Pdfocr.Helpers {
26+
public class TestProcessProperties : IOcrProcessProperties {
27+
private float cellWidth;
28+
29+
private float cellHeight;
30+
31+
private float startX;
32+
33+
private float startY;
34+
35+
private int rowCount;
36+
37+
private int columnCount;
38+
39+
public TestProcessProperties(int rowCount, int columnCount, float cellWidth, float cellHeight, float startX
40+
, float startY) {
41+
this.rowCount = rowCount;
42+
this.columnCount = columnCount;
43+
this.cellWidth = cellWidth;
44+
this.cellHeight = cellHeight;
45+
this.startX = startX;
46+
this.startY = startY;
47+
}
48+
49+
public virtual int GetRowCount() {
50+
return rowCount;
51+
}
52+
53+
public virtual int GetColumnCount() {
54+
return columnCount;
55+
}
56+
57+
public virtual float GetCellWidth() {
58+
return cellWidth;
59+
}
60+
61+
public virtual float GetCellHeight() {
62+
return cellHeight;
63+
}
64+
65+
public virtual float GetStartX() {
66+
return startX;
67+
}
68+
69+
public virtual float GetStartY() {
70+
return startY;
71+
}
72+
}
73+
}
Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
/*
2+
This file is part of the iText (R) project.
3+
Copyright (c) 1998-2024 Apryse Group NV
4+
Authors: Apryse Software.
5+
6+
This program is offered under a commercial and under the AGPL license.
7+
For commercial licensing, contact us at https://itextpdf.com/sales. For AGPL licensing, see below.
8+
9+
AGPL licensing:
10+
This program is free software: you can redistribute it and/or modify
11+
it under the terms of the GNU Affero General Public License as published by
12+
the Free Software Foundation, either version 3 of the License, or
13+
(at your option) any later version.
14+
15+
This program is distributed in the hope that it will be useful,
16+
but WITHOUT ANY WARRANTY; without even the implied warranty of
17+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18+
GNU Affero General Public License for more details.
19+
20+
You should have received a copy of the GNU Affero General Public License
21+
along with this program. If not, see <https://www.gnu.org/licenses/>.
22+
*/
23+
using System.Collections.Generic;
24+
using System.IO;
25+
using iText.Kernel.Geom;
26+
using iText.Pdfocr;
27+
using iText.Pdfocr.Structuretree;
28+
29+
namespace iText.Pdfocr.Helpers {
30+
public class TestStructureDetectionOcrEngine : IOcrEngine {
31+
public TestStructureDetectionOcrEngine() {
32+
}
33+
34+
public virtual IDictionary<int, IList<TextInfo>> DoImageOcr(FileInfo input) {
35+
return null;
36+
}
37+
38+
public virtual IDictionary<int, IList<TextInfo>> DoImageOcr(FileInfo input, OcrProcessContext ocrProcessContext
39+
) {
40+
TestProcessProperties processProperties = (TestProcessProperties)ocrProcessContext.GetOcrProcessProperties
41+
();
42+
IList<TextInfo> textItems = new List<TextInfo>();
43+
TableTreeItem table = new TableTreeItem();
44+
float cellWidth = processProperties.GetCellWidth();
45+
float cellHeight = processProperties.GetCellHeight();
46+
float startX = processProperties.GetStartX();
47+
float startY = processProperties.GetStartY();
48+
float x = startX;
49+
float y = startY;
50+
for (int i = 0; i < processProperties.GetRowCount(); ++i) {
51+
TableRowTreeItem row = null;
52+
if (i > 0) {
53+
row = new TableRowTreeItem();
54+
table.AddRow(row);
55+
}
56+
for (int j = 0; j < processProperties.GetColumnCount(); ++j) {
57+
TextInfo textInfo = new TextInfo(i + " " + j, new Rectangle(x, y, cellWidth, cellHeight));
58+
// Mark the 1st row item as artifacts
59+
if (i == 0) {
60+
textInfo.SetLogicalStructureTreeItem(ArtifactItem.GetInstance());
61+
}
62+
else {
63+
TableCellTreeItem cell = new TableCellTreeItem();
64+
row.AddCell(cell);
65+
ParagraphTreeItem paragraph = new ParagraphTreeItem();
66+
cell.AddChild(paragraph);
67+
SpanTreeItem span = new SpanTreeItem();
68+
paragraph.AddChild(span);
69+
textInfo.SetLogicalStructureTreeItem(span);
70+
}
71+
textItems.Add(textInfo);
72+
x += cellWidth;
73+
}
74+
x = startX;
75+
y -= cellHeight;
76+
}
77+
IDictionary<int, IList<TextInfo>> result = new Dictionary<int, IList<TextInfo>>();
78+
result.Put(1, textItems);
79+
return result;
80+
}
81+
82+
public virtual void CreateTxtFile(IList<FileInfo> inputImages, FileInfo txtFile) {
83+
}
84+
85+
public virtual void CreateTxtFile(IList<FileInfo> inputImages, FileInfo txtFile, OcrProcessContext ocrProcessContext
86+
) {
87+
}
88+
}
89+
}

itext.tests/itext.pdfocr.api.tests/itext/pdfocr/statistics/PdfOcrOutputTypeStatisticsAggregatorTest.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*
22
This file is part of the iText (R) project.
3-
Copyright (c) 1998-2023 Apryse Group NV
3+
Copyright (c) 1998-2024 Apryse Group NV
44
Authors: Apryse Software.
55
66
This program is offered under a commercial and under the AGPL license.

itext.tests/itext.pdfocr.api.tests/itext/pdfocr/statistics/PdfOcrOutputTypeStatisticsEventTest.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*
22
This file is part of the iText (R) project.
3-
Copyright (c) 1998-2023 Apryse Group NV
3+
Copyright (c) 1998-2024 Apryse Group NV
44
Authors: Apryse Software.
55
66
This program is offered under a commercial and under the AGPL license.

0 commit comments

Comments
 (0)