Skip to content

Commit 6480e74

Browse files
committed
Merge branch 'release_branch_DEVSIX-5576' into master-rc
2 parents 20eb242 + b1293d6 commit 6480e74

File tree

100 files changed

+3249
-1757
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

100 files changed

+3249
-1757
lines changed

Jenkinsfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,6 @@
44
def repoName = "pdfOcr"
55
def dependencyRegex = "itextcore"
66
def solutionFile = "i7n-ocr.sln"
7-
def csprojFramework = "netcoreapp2.0"
7+
def csprojFramework = "net461"
88

99
automaticDotnetBuild(repoName, dependencyRegex, solutionFile, csprojFramework)

doxyfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ DOXYFILE_ENCODING = UTF-8
3232
# title of most generated pages and in a few other places.
3333
# The default value is: My Project.
3434

35-
PROJECT_NAME = "pdfOCR 1.0.3 API"
35+
PROJECT_NAME = "pdfOCR 2.0.0 API"
3636

3737
# The PROJECT_NUMBER tag can be used to enter a project or revision number. This
3838
# could be handy for archiving the generated documentation or if some version

itext.tests/itext.pdfocr.api.tests/Properties/AssemblyInfo.cs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,6 @@
1515

1616
[assembly: Guid("d6a6ea97-1f23-448f-b700-eff62971d234")]
1717

18-
[assembly: AssemblyVersion("1.0.3.0")]
19-
[assembly: AssemblyFileVersion("1.0.3.0")]
20-
[assembly: AssemblyInformationalVersion("1.0.3")]
18+
[assembly: AssemblyVersion("2.0.0.0")]
19+
[assembly: AssemblyFileVersion("2.0.0.0")]
20+
[assembly: AssemblyInformationalVersion("2.0.0")]

itext.tests/itext.pdfocr.api.tests/itext.pdfocr.api.tests.csproj

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
<OutputType>library</OutputType>
1010
</PropertyGroup>
1111
<PropertyGroup>
12-
<TargetFramework>net45</TargetFramework>
12+
<TargetFramework>net461</TargetFramework>
1313
</PropertyGroup>
1414
<PropertyGroup>
1515
<SignAssembly>true</SignAssembly>
@@ -25,7 +25,7 @@
2525
</ItemGroup>
2626
<ItemGroup>
2727
<ProjectReference Include="..\..\..\itextcore\itext\itext.pdftest\itext.pdftest.netstandard.csproj" Condition="Exists('..\..\..\itextcore\itext\itext.pdftest\itext.pdftest.netstandard.csproj')" />
28-
<PackageReference Include="itext7.pdftest" Version="7.1.16" Condition="!Exists('..\..\..\itextcore\itext\itext.pdftest\itext.pdftest.netstandard.csproj')" />
28+
<PackageReference Include="itext7.pdftest" Version="7.2.0" Condition="!Exists('..\..\..\itextcore\itext\itext.pdftest\itext.pdftest.netstandard.csproj')" />
2929
<PackageReference Include="Microsoft.NET.Test.Sdk" Version="16.6.0" />
3030
<PackageReference Include="NUnit" Version="3.12.0" />
3131
<PackageReference Include="NUnit3TestAdapter" Version="3.16.1">

itext.tests/itext.pdfocr.api.tests/itext/pdfocr/ApiTest.cs

Lines changed: 102 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -23,44 +23,129 @@ You should have received a copy of the GNU Affero General Public License
2323
using System;
2424
using System.Collections.Generic;
2525
using System.IO;
26+
using iText.Commons.Actions.Contexts;
27+
using iText.Commons.Utils;
2628
using iText.IO.Image;
27-
using iText.IO.Util;
2829
using iText.Kernel.Colors;
2930
using iText.Kernel.Font;
3031
using iText.Kernel.Geom;
32+
using iText.Kernel.Pdf;
33+
using iText.Pdfa;
3134
using iText.Pdfocr.Helpers;
35+
using iText.Pdfocr.Logs;
3236
using iText.Test;
3337
using iText.Test.Attributes;
3438

3539
namespace iText.Pdfocr {
3640
public class ApiTest : ExtendedITextTest {
41+
public static readonly String DESTINATION_FOLDER = NUnit.Framework.TestContext.CurrentContext.TestDirectory
42+
+ "/test/itext/pdfocr";
43+
44+
[NUnit.Framework.OneTimeSetUp]
45+
public static void BeforeClass() {
46+
CreateOrClearDestinationFolder(DESTINATION_FOLDER);
47+
}
48+
3749
[NUnit.Framework.Test]
38-
public virtual void TestTextInfo() {
39-
String path = PdfHelper.GetDefaultImagePath();
40-
IDictionary<int, IList<TextInfo>> result = new CustomOcrEngine().DoImageOcr(new FileInfo(path));
41-
NUnit.Framework.Assert.AreEqual(1, result.Count);
42-
TextInfo textInfo = new TextInfo();
43-
textInfo.SetText("text");
44-
textInfo.SetBboxRect(new Rectangle(204.0f, 158.0f, 538.0f, 136.0f));
45-
int page = 2;
46-
result.Put(page, JavaCollectionsUtil.SingletonList<TextInfo>(textInfo));
47-
NUnit.Framework.Assert.AreEqual(2, result.Count);
48-
NUnit.Framework.Assert.AreEqual(textInfo.GetText(), result.Get(page)[0].GetText());
50+
public virtual void CreatePdfWithFileTest() {
51+
OcrPdfCreatorProperties props = new OcrPdfCreatorProperties().SetMetaInfo(new ApiTest.DummyMetaInfo());
52+
OcrPdfCreator pdfCreator = new OcrPdfCreator(new CustomOcrEngine(), props);
53+
using (PdfDocument pdf = pdfCreator.CreatePdf(JavaCollectionsUtil.SingletonList<FileInfo>(new FileInfo(PdfHelper
54+
.GetDefaultImagePath())), PdfHelper.GetPdfWriter(), new DocumentProperties().SetEventCountingMetaInfo(
55+
new ApiTest.DummyMetaInfo()))) {
56+
String contentBytes = iText.Commons.Utils.JavaUtil.GetStringForBytes(pdf.GetPage(1).GetContentBytes(), System.Text.Encoding
57+
.UTF8);
58+
NUnit.Framework.Assert.IsTrue(contentBytes.Contains("<00190014001c001400150014>"));
59+
}
60+
}
61+
62+
[NUnit.Framework.Test]
63+
public virtual void CreatePdfFileWithFileTest() {
64+
String output = DESTINATION_FOLDER + "createPdfFileWithFileTest.pdf";
65+
OcrPdfCreatorProperties props = new OcrPdfCreatorProperties().SetMetaInfo(new ApiTest.DummyMetaInfo());
66+
OcrPdfCreator pdfCreator = new OcrPdfCreator(new CustomOcrEngine(), props);
67+
pdfCreator.CreatePdfFile(JavaCollectionsUtil.SingletonList<FileInfo>(new FileInfo(PdfHelper.GetDefaultImagePath
68+
())), new FileInfo(output));
69+
using (PdfDocument pdf = new PdfDocument(new PdfReader(output))) {
70+
String contentBytes = iText.Commons.Utils.JavaUtil.GetStringForBytes(pdf.GetPage(1).GetContentBytes(), System.Text.Encoding
71+
.UTF8);
72+
NUnit.Framework.Assert.IsTrue(contentBytes.Contains("<00190014001c001400150014>"));
73+
}
74+
}
75+
76+
[NUnit.Framework.Test]
77+
public virtual void CreatePdfAWithFileTest() {
78+
OcrPdfCreatorProperties props = new OcrPdfCreatorProperties().SetMetaInfo(new ApiTest.DummyMetaInfo()).SetPdfLang
79+
("en-US");
80+
OcrPdfCreator pdfCreator = new OcrPdfCreator(new CustomOcrEngine(), props);
81+
using (PdfDocument pdf = pdfCreator.CreatePdfA(JavaCollectionsUtil.SingletonList<FileInfo>(new FileInfo(PdfHelper
82+
.GetDefaultImagePath())), PdfHelper.GetPdfWriter(), new DocumentProperties().SetEventCountingMetaInfo(
83+
new ApiTest.DummyMetaInfo()), PdfHelper.GetRGBPdfOutputIntent())) {
84+
String contentBytes = iText.Commons.Utils.JavaUtil.GetStringForBytes(pdf.GetPage(1).GetContentBytes(), System.Text.Encoding
85+
.UTF8);
86+
NUnit.Framework.Assert.IsTrue(contentBytes.Contains("<00190014001c001400150014>"));
87+
NUnit.Framework.Assert.IsTrue(pdf is PdfADocument);
88+
}
4989
}
5090

5191
[NUnit.Framework.Test]
52-
public virtual void TestTextInfoDeprecationMode() {
92+
public virtual void CreatePdfAFileWithFileTest() {
93+
String output = DESTINATION_FOLDER + "createPdfAFileWithFileTest.pdf";
94+
OcrPdfCreatorProperties props = new OcrPdfCreatorProperties().SetMetaInfo(new ApiTest.DummyMetaInfo()).SetPdfLang
95+
("en-US");
96+
OcrPdfCreator pdfCreator = new OcrPdfCreator(new CustomOcrEngine(), props);
97+
pdfCreator.CreatePdfAFile(JavaCollectionsUtil.SingletonList<FileInfo>(new FileInfo(PdfHelper.GetDefaultImagePath
98+
())), new FileInfo(output), PdfHelper.GetRGBPdfOutputIntent());
99+
using (PdfDocument pdf = new PdfDocument(new PdfReader(output))) {
100+
String contentBytes = iText.Commons.Utils.JavaUtil.GetStringForBytes(pdf.GetPage(1).GetContentBytes(), System.Text.Encoding
101+
.UTF8);
102+
NUnit.Framework.Assert.IsTrue(contentBytes.Contains("<00190014001c001400150014>"));
103+
PdfAConformanceLevel cl = pdf.GetReader().GetPdfAConformanceLevel();
104+
NUnit.Framework.Assert.AreEqual(PdfAConformanceLevel.PDF_A_3U.GetConformance(), cl.GetConformance());
105+
NUnit.Framework.Assert.AreEqual(PdfAConformanceLevel.PDF_A_3U.GetPart(), cl.GetPart());
106+
}
107+
}
108+
109+
[NUnit.Framework.Test]
110+
public virtual void CreatePdfAFileWithFileNoMetaTest() {
111+
String output = DESTINATION_FOLDER + "createPdfAFileWithFileNoMetaTest.pdf";
112+
OcrPdfCreatorProperties props = new OcrPdfCreatorProperties().SetPdfLang("en-US");
113+
OcrPdfCreator pdfCreator = new OcrPdfCreator(new CustomOcrEngine(), props);
114+
pdfCreator.CreatePdfAFile(JavaCollectionsUtil.SingletonList<FileInfo>(new FileInfo(PdfHelper.GetDefaultImagePath
115+
())), new FileInfo(output), PdfHelper.GetRGBPdfOutputIntent());
116+
using (PdfDocument pdf = new PdfDocument(new PdfReader(output))) {
117+
String contentBytes = iText.Commons.Utils.JavaUtil.GetStringForBytes(pdf.GetPage(1).GetContentBytes(), System.Text.Encoding
118+
.UTF8);
119+
NUnit.Framework.Assert.IsTrue(contentBytes.Contains("<00190014001c001400150014>"));
120+
PdfAConformanceLevel cl = pdf.GetReader().GetPdfAConformanceLevel();
121+
NUnit.Framework.Assert.AreEqual(PdfAConformanceLevel.PDF_A_3U.GetConformance(), cl.GetConformance());
122+
NUnit.Framework.Assert.AreEqual(PdfAConformanceLevel.PDF_A_3U.GetPart(), cl.GetPart());
123+
}
124+
}
125+
126+
[NUnit.Framework.Test]
127+
public virtual void CreatePdfAFileWithFileProductAwareEngineTest() {
128+
String output = DESTINATION_FOLDER + "createPdfAFileWithFileProductAwareEngineTest.pdf";
129+
OcrPdfCreatorProperties props = new OcrPdfCreatorProperties().SetPdfLang("en-US");
130+
CustomProductAwareOcrEngine ocrEngine = new CustomProductAwareOcrEngine();
131+
OcrPdfCreator pdfCreator = new OcrPdfCreator(ocrEngine, props);
132+
pdfCreator.CreatePdfAFile(JavaCollectionsUtil.SingletonList<FileInfo>(new FileInfo(PdfHelper.GetDefaultImagePath
133+
())), new FileInfo(output), PdfHelper.GetRGBPdfOutputIntent());
134+
NUnit.Framework.Assert.IsTrue(ocrEngine.IsGetMetaInfoContainerTriggered());
135+
}
136+
137+
[NUnit.Framework.Test]
138+
public virtual void TestTextInfo() {
53139
String path = PdfHelper.GetDefaultImagePath();
54-
IDictionary<int, IList<TextInfo>> result = new CustomOcrEngine(true).DoImageOcr(new FileInfo(path));
140+
IDictionary<int, IList<TextInfo>> result = new CustomOcrEngine().DoImageOcr(new FileInfo(path));
55141
NUnit.Framework.Assert.AreEqual(1, result.Count);
56142
TextInfo textInfo = new TextInfo();
57143
textInfo.SetText("text");
58-
textInfo.SetBbox(JavaUtil.ArraysAsList(204.0f, 158.0f, 742.0f, 294.0f));
144+
textInfo.SetBboxRect(new Rectangle(204.0f, 158.0f, 538.0f, 136.0f));
59145
int page = 2;
60146
result.Put(page, JavaCollectionsUtil.SingletonList<TextInfo>(textInfo));
61147
NUnit.Framework.Assert.AreEqual(2, result.Count);
62148
NUnit.Framework.Assert.AreEqual(textInfo.GetText(), result.Get(page)[0].GetText());
63-
NUnit.Framework.Assert.AreEqual(textInfo.GetBbox().Count, result.Get(page)[0].GetBbox().Count);
64149
}
65150

66151
[LogMessage(PdfOcrLogMessageConstant.COULD_NOT_FIND_CORRESPONDING_GLYPH_TO_UNICODE_CHARACTER, Count = 7)]
@@ -113,18 +198,7 @@ public virtual ImageData ApplyRotation(ImageData imageData) {
113198
}
114199
}
115200

116-
[LogMessage(PdfOcrLogMessageConstant.COULD_NOT_FIND_CORRESPONDING_GLYPH_TO_UNICODE_CHARACTER, Count = 7)]
117-
[NUnit.Framework.Test]
118-
public virtual void TestThaiImageWithNotDefGlyphsDeprecationMode() {
119-
String testName = "testThaiImageWithNotdefGlyphs";
120-
String path = PdfHelper.GetThaiImagePath();
121-
String pdfPath = PdfHelper.GetTargetDirectory() + testName + ".pdf";
122-
PdfHelper.CreatePdf(pdfPath, new FileInfo(path), new OcrPdfCreatorProperties().SetTextColor(DeviceRgb.BLACK
123-
), true);
124-
ExtractionStrategy strategy = PdfHelper.GetExtractionStrategy(pdfPath);
125-
PdfFont font = strategy.GetPdfFont();
126-
String fontName = font.GetFontProgram().GetFontNames().GetFontName();
127-
NUnit.Framework.Assert.IsTrue(fontName.Contains("LiberationSans"));
201+
private class DummyMetaInfo : IMetaInfo {
128202
}
129203
}
130204
}
Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,137 @@
1+
/*
2+
This file is part of the iText (R) project.
3+
Copyright (c) 1998-2021 iText Group NV
4+
Authors: iText Software.
5+
6+
This program is offered under a commercial and under the AGPL license.
7+
For commercial licensing, contact us at https://itextpdf.com/sales. For AGPL licensing, see below.
8+
9+
AGPL licensing:
10+
This program is free software: you can redistribute it and/or modify
11+
it under the terms of the GNU Affero General Public License as published by
12+
the Free Software Foundation, either version 3 of the License, or
13+
(at your option) any later version.
14+
15+
This program is distributed in the hope that it will be useful,
16+
but WITHOUT ANY WARRANTY; without even the implied warranty of
17+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18+
GNU Affero General Public License for more details.
19+
20+
You should have received a copy of the GNU Affero General Public License
21+
along with this program. If not, see <https://www.gnu.org/licenses/>.
22+
*/
23+
using System;
24+
using System.Collections.Generic;
25+
using iText.Commons.Actions;
26+
using iText.Commons.Actions.Confirmations;
27+
using iText.Commons.Actions.Contexts;
28+
using iText.Commons.Actions.Data;
29+
using iText.Commons.Actions.Sequence;
30+
using iText.Commons.Utils;
31+
using iText.Kernel.Actions.Data;
32+
using iText.Pdfocr.Statistics;
33+
using iText.Test;
34+
35+
namespace iText.Pdfocr {
36+
public class OcrPdfCreatorEventHelperTest : ExtendedITextTest {
37+
private static readonly ProductData DUMMY_PRODUCT_DATA = new ProductData("test-product", "inner_product",
38+
"1.0.0", 1900, 2100);
39+
40+
private OcrPdfCreatorEventHelperTest.StoreEventsHandler storeEventsHandler;
41+
42+
[NUnit.Framework.SetUp]
43+
public virtual void Before() {
44+
storeEventsHandler = new OcrPdfCreatorEventHelperTest.StoreEventsHandler();
45+
EventManager.GetInstance().Register(storeEventsHandler);
46+
}
47+
48+
[NUnit.Framework.TearDown]
49+
public virtual void After() {
50+
EventManager.GetInstance().Unregister(storeEventsHandler);
51+
storeEventsHandler = null;
52+
}
53+
54+
[NUnit.Framework.Test]
55+
public virtual void ProductContextBasedEventTest() {
56+
OcrPdfCreatorEventHelper helper = new OcrPdfCreatorEventHelper(new SequenceId(), new OcrPdfCreatorEventHelperTest.DummyMetaInfo
57+
());
58+
OcrPdfCreatorEventHelperTest.DummyITextEvent @event = new OcrPdfCreatorEventHelperTest.DummyITextEvent();
59+
helper.OnEvent(@event);
60+
NUnit.Framework.Assert.AreEqual(1, storeEventsHandler.GetEvents().Count);
61+
NUnit.Framework.Assert.AreEqual(@event, storeEventsHandler.GetEvents()[0]);
62+
}
63+
64+
[NUnit.Framework.Test]
65+
public virtual void PdfOcrStatisticsEventTest() {
66+
OcrPdfCreatorEventHelper helper = new OcrPdfCreatorEventHelper(new SequenceId(), new OcrPdfCreatorEventHelperTest.DummyMetaInfo
67+
());
68+
PdfOcrOutputTypeStatisticsEvent e = new PdfOcrOutputTypeStatisticsEvent(PdfOcrOutputType.PDF, DUMMY_PRODUCT_DATA
69+
);
70+
helper.OnEvent(e);
71+
NUnit.Framework.Assert.AreEqual(0, storeEventsHandler.GetEvents().Count);
72+
}
73+
74+
[NUnit.Framework.Test]
75+
public virtual void CustomProductEventTest() {
76+
OcrPdfCreatorEventHelper helper = new OcrPdfCreatorEventHelper(new SequenceId(), new OcrPdfCreatorEventHelperTest.DummyMetaInfo
77+
());
78+
AbstractProductITextEvent @event = new OcrPdfCreatorEventHelperTest.CustomProductITextEvent(DUMMY_PRODUCT_DATA
79+
);
80+
helper.OnEvent(@event);
81+
NUnit.Framework.Assert.AreEqual(1, storeEventsHandler.GetEvents().Count);
82+
NUnit.Framework.Assert.AreEqual(@event, storeEventsHandler.GetEvents()[0]);
83+
}
84+
85+
[NUnit.Framework.Test]
86+
public virtual void CustomStatisticsEventTest() {
87+
OcrPdfCreatorEventHelper helper = new OcrPdfCreatorEventHelper(new SequenceId(), new OcrPdfCreatorEventHelperTest.DummyMetaInfo
88+
());
89+
OcrPdfCreatorEventHelperTest.CustomStatisticsEvent @event = new OcrPdfCreatorEventHelperTest.CustomStatisticsEvent
90+
(DUMMY_PRODUCT_DATA);
91+
helper.OnEvent(@event);
92+
NUnit.Framework.Assert.AreEqual(1, storeEventsHandler.GetEvents().Count);
93+
NUnit.Framework.Assert.AreEqual(@event, storeEventsHandler.GetEvents()[0]);
94+
}
95+
96+
private class DummyMetaInfo : IMetaInfo {
97+
}
98+
99+
private class DummyITextEvent : AbstractProductProcessITextEvent {
100+
protected internal DummyITextEvent()
101+
: base(ITextCoreProductData.GetInstance(), null, EventConfirmationType.ON_DEMAND) {
102+
}
103+
104+
public override String GetEventType() {
105+
return "test-event";
106+
}
107+
}
108+
109+
private class CustomProductITextEvent : AbstractProductITextEvent {
110+
protected internal CustomProductITextEvent(ProductData productData)
111+
: base(productData) {
112+
}
113+
}
114+
115+
private class CustomStatisticsEvent : AbstractStatisticsEvent {
116+
protected internal CustomStatisticsEvent(ProductData productData)
117+
: base(productData) {
118+
}
119+
120+
public override IList<String> GetStatisticsNames() {
121+
return JavaCollectionsUtil.SingletonList("custom-statistics");
122+
}
123+
}
124+
125+
private class StoreEventsHandler : IEventHandler {
126+
private IList<IEvent> events = new List<IEvent>();
127+
128+
public virtual IList<IEvent> GetEvents() {
129+
return events;
130+
}
131+
132+
public virtual void OnEvent(IEvent @event) {
133+
events.Add(@event);
134+
}
135+
}
136+
}
137+
}

0 commit comments

Comments
 (0)