@@ -23,44 +23,129 @@ You should have received a copy of the GNU Affero General Public License
23
23
using System ;
24
24
using System . Collections . Generic ;
25
25
using System . IO ;
26
+ using iText . Commons . Actions . Contexts ;
27
+ using iText . Commons . Utils ;
26
28
using iText . IO . Image ;
27
- using iText . IO . Util ;
28
29
using iText . Kernel . Colors ;
29
30
using iText . Kernel . Font ;
30
31
using iText . Kernel . Geom ;
32
+ using iText . Kernel . Pdf ;
33
+ using iText . Pdfa ;
31
34
using iText . Pdfocr . Helpers ;
35
+ using iText . Pdfocr . Logs ;
32
36
using iText . Test ;
33
37
using iText . Test . Attributes ;
34
38
35
39
namespace iText . Pdfocr {
36
40
public class ApiTest : ExtendedITextTest {
41
+ public static readonly String DESTINATION_FOLDER = NUnit . Framework . TestContext . CurrentContext . TestDirectory
42
+ + "/test/itext/pdfocr" ;
43
+
44
+ [ NUnit . Framework . OneTimeSetUp ]
45
+ public static void BeforeClass ( ) {
46
+ CreateOrClearDestinationFolder ( DESTINATION_FOLDER ) ;
47
+ }
48
+
37
49
[ NUnit . Framework . Test ]
38
- public virtual void TestTextInfo ( ) {
39
- String path = PdfHelper . GetDefaultImagePath ( ) ;
40
- IDictionary < int , IList < TextInfo > > result = new CustomOcrEngine ( ) . DoImageOcr ( new FileInfo ( path ) ) ;
41
- NUnit . Framework . Assert . AreEqual ( 1 , result . Count ) ;
42
- TextInfo textInfo = new TextInfo ( ) ;
43
- textInfo . SetText ( "text" ) ;
44
- textInfo . SetBboxRect ( new Rectangle ( 204.0f , 158.0f , 538.0f , 136.0f ) ) ;
45
- int page = 2 ;
46
- result . Put ( page , JavaCollectionsUtil . SingletonList < TextInfo > ( textInfo ) ) ;
47
- NUnit . Framework . Assert . AreEqual ( 2 , result . Count ) ;
48
- NUnit . Framework . Assert . AreEqual ( textInfo . GetText ( ) , result . Get ( page ) [ 0 ] . GetText ( ) ) ;
50
+ public virtual void CreatePdfWithFileTest ( ) {
51
+ OcrPdfCreatorProperties props = new OcrPdfCreatorProperties ( ) . SetMetaInfo ( new ApiTest . DummyMetaInfo ( ) ) ;
52
+ OcrPdfCreator pdfCreator = new OcrPdfCreator ( new CustomOcrEngine ( ) , props ) ;
53
+ using ( PdfDocument pdf = pdfCreator . CreatePdf ( JavaCollectionsUtil . SingletonList < FileInfo > ( new FileInfo ( PdfHelper
54
+ . GetDefaultImagePath ( ) ) ) , PdfHelper . GetPdfWriter ( ) , new DocumentProperties ( ) . SetEventCountingMetaInfo (
55
+ new ApiTest . DummyMetaInfo ( ) ) ) ) {
56
+ String contentBytes = iText . Commons . Utils . JavaUtil . GetStringForBytes ( pdf . GetPage ( 1 ) . GetContentBytes ( ) , System . Text . Encoding
57
+ . UTF8 ) ;
58
+ NUnit . Framework . Assert . IsTrue ( contentBytes . Contains ( "<00190014001c001400150014>" ) ) ;
59
+ }
60
+ }
61
+
62
+ [ NUnit . Framework . Test ]
63
+ public virtual void CreatePdfFileWithFileTest ( ) {
64
+ String output = DESTINATION_FOLDER + "createPdfFileWithFileTest.pdf" ;
65
+ OcrPdfCreatorProperties props = new OcrPdfCreatorProperties ( ) . SetMetaInfo ( new ApiTest . DummyMetaInfo ( ) ) ;
66
+ OcrPdfCreator pdfCreator = new OcrPdfCreator ( new CustomOcrEngine ( ) , props ) ;
67
+ pdfCreator . CreatePdfFile ( JavaCollectionsUtil . SingletonList < FileInfo > ( new FileInfo ( PdfHelper . GetDefaultImagePath
68
+ ( ) ) ) , new FileInfo ( output ) ) ;
69
+ using ( PdfDocument pdf = new PdfDocument ( new PdfReader ( output ) ) ) {
70
+ String contentBytes = iText . Commons . Utils . JavaUtil . GetStringForBytes ( pdf . GetPage ( 1 ) . GetContentBytes ( ) , System . Text . Encoding
71
+ . UTF8 ) ;
72
+ NUnit . Framework . Assert . IsTrue ( contentBytes . Contains ( "<00190014001c001400150014>" ) ) ;
73
+ }
74
+ }
75
+
76
+ [ NUnit . Framework . Test ]
77
+ public virtual void CreatePdfAWithFileTest ( ) {
78
+ OcrPdfCreatorProperties props = new OcrPdfCreatorProperties ( ) . SetMetaInfo ( new ApiTest . DummyMetaInfo ( ) ) . SetPdfLang
79
+ ( "en-US" ) ;
80
+ OcrPdfCreator pdfCreator = new OcrPdfCreator ( new CustomOcrEngine ( ) , props ) ;
81
+ using ( PdfDocument pdf = pdfCreator . CreatePdfA ( JavaCollectionsUtil . SingletonList < FileInfo > ( new FileInfo ( PdfHelper
82
+ . GetDefaultImagePath ( ) ) ) , PdfHelper . GetPdfWriter ( ) , new DocumentProperties ( ) . SetEventCountingMetaInfo (
83
+ new ApiTest . DummyMetaInfo ( ) ) , PdfHelper . GetRGBPdfOutputIntent ( ) ) ) {
84
+ String contentBytes = iText . Commons . Utils . JavaUtil . GetStringForBytes ( pdf . GetPage ( 1 ) . GetContentBytes ( ) , System . Text . Encoding
85
+ . UTF8 ) ;
86
+ NUnit . Framework . Assert . IsTrue ( contentBytes . Contains ( "<00190014001c001400150014>" ) ) ;
87
+ NUnit . Framework . Assert . IsTrue ( pdf is PdfADocument ) ;
88
+ }
49
89
}
50
90
51
91
[ NUnit . Framework . Test ]
52
- public virtual void TestTextInfoDeprecationMode ( ) {
92
+ public virtual void CreatePdfAFileWithFileTest ( ) {
93
+ String output = DESTINATION_FOLDER + "createPdfAFileWithFileTest.pdf" ;
94
+ OcrPdfCreatorProperties props = new OcrPdfCreatorProperties ( ) . SetMetaInfo ( new ApiTest . DummyMetaInfo ( ) ) . SetPdfLang
95
+ ( "en-US" ) ;
96
+ OcrPdfCreator pdfCreator = new OcrPdfCreator ( new CustomOcrEngine ( ) , props ) ;
97
+ pdfCreator . CreatePdfAFile ( JavaCollectionsUtil . SingletonList < FileInfo > ( new FileInfo ( PdfHelper . GetDefaultImagePath
98
+ ( ) ) ) , new FileInfo ( output ) , PdfHelper . GetRGBPdfOutputIntent ( ) ) ;
99
+ using ( PdfDocument pdf = new PdfDocument ( new PdfReader ( output ) ) ) {
100
+ String contentBytes = iText . Commons . Utils . JavaUtil . GetStringForBytes ( pdf . GetPage ( 1 ) . GetContentBytes ( ) , System . Text . Encoding
101
+ . UTF8 ) ;
102
+ NUnit . Framework . Assert . IsTrue ( contentBytes . Contains ( "<00190014001c001400150014>" ) ) ;
103
+ PdfAConformanceLevel cl = pdf . GetReader ( ) . GetPdfAConformanceLevel ( ) ;
104
+ NUnit . Framework . Assert . AreEqual ( PdfAConformanceLevel . PDF_A_3U . GetConformance ( ) , cl . GetConformance ( ) ) ;
105
+ NUnit . Framework . Assert . AreEqual ( PdfAConformanceLevel . PDF_A_3U . GetPart ( ) , cl . GetPart ( ) ) ;
106
+ }
107
+ }
108
+
109
+ [ NUnit . Framework . Test ]
110
+ public virtual void CreatePdfAFileWithFileNoMetaTest ( ) {
111
+ String output = DESTINATION_FOLDER + "createPdfAFileWithFileNoMetaTest.pdf" ;
112
+ OcrPdfCreatorProperties props = new OcrPdfCreatorProperties ( ) . SetPdfLang ( "en-US" ) ;
113
+ OcrPdfCreator pdfCreator = new OcrPdfCreator ( new CustomOcrEngine ( ) , props ) ;
114
+ pdfCreator . CreatePdfAFile ( JavaCollectionsUtil . SingletonList < FileInfo > ( new FileInfo ( PdfHelper . GetDefaultImagePath
115
+ ( ) ) ) , new FileInfo ( output ) , PdfHelper . GetRGBPdfOutputIntent ( ) ) ;
116
+ using ( PdfDocument pdf = new PdfDocument ( new PdfReader ( output ) ) ) {
117
+ String contentBytes = iText . Commons . Utils . JavaUtil . GetStringForBytes ( pdf . GetPage ( 1 ) . GetContentBytes ( ) , System . Text . Encoding
118
+ . UTF8 ) ;
119
+ NUnit . Framework . Assert . IsTrue ( contentBytes . Contains ( "<00190014001c001400150014>" ) ) ;
120
+ PdfAConformanceLevel cl = pdf . GetReader ( ) . GetPdfAConformanceLevel ( ) ;
121
+ NUnit . Framework . Assert . AreEqual ( PdfAConformanceLevel . PDF_A_3U . GetConformance ( ) , cl . GetConformance ( ) ) ;
122
+ NUnit . Framework . Assert . AreEqual ( PdfAConformanceLevel . PDF_A_3U . GetPart ( ) , cl . GetPart ( ) ) ;
123
+ }
124
+ }
125
+
126
+ [ NUnit . Framework . Test ]
127
+ public virtual void CreatePdfAFileWithFileProductAwareEngineTest ( ) {
128
+ String output = DESTINATION_FOLDER + "createPdfAFileWithFileProductAwareEngineTest.pdf" ;
129
+ OcrPdfCreatorProperties props = new OcrPdfCreatorProperties ( ) . SetPdfLang ( "en-US" ) ;
130
+ CustomProductAwareOcrEngine ocrEngine = new CustomProductAwareOcrEngine ( ) ;
131
+ OcrPdfCreator pdfCreator = new OcrPdfCreator ( ocrEngine , props ) ;
132
+ pdfCreator . CreatePdfAFile ( JavaCollectionsUtil . SingletonList < FileInfo > ( new FileInfo ( PdfHelper . GetDefaultImagePath
133
+ ( ) ) ) , new FileInfo ( output ) , PdfHelper . GetRGBPdfOutputIntent ( ) ) ;
134
+ NUnit . Framework . Assert . IsTrue ( ocrEngine . IsGetMetaInfoContainerTriggered ( ) ) ;
135
+ }
136
+
137
+ [ NUnit . Framework . Test ]
138
+ public virtual void TestTextInfo ( ) {
53
139
String path = PdfHelper . GetDefaultImagePath ( ) ;
54
- IDictionary < int , IList < TextInfo > > result = new CustomOcrEngine ( true ) . DoImageOcr ( new FileInfo ( path ) ) ;
140
+ IDictionary < int , IList < TextInfo > > result = new CustomOcrEngine ( ) . DoImageOcr ( new FileInfo ( path ) ) ;
55
141
NUnit . Framework . Assert . AreEqual ( 1 , result . Count ) ;
56
142
TextInfo textInfo = new TextInfo ( ) ;
57
143
textInfo . SetText ( "text" ) ;
58
- textInfo . SetBbox ( JavaUtil . ArraysAsList ( 204.0f , 158.0f , 742 .0f, 294 .0f) ) ;
144
+ textInfo . SetBboxRect ( new Rectangle ( 204.0f , 158.0f , 538 .0f, 136 .0f) ) ;
59
145
int page = 2 ;
60
146
result . Put ( page , JavaCollectionsUtil . SingletonList < TextInfo > ( textInfo ) ) ;
61
147
NUnit . Framework . Assert . AreEqual ( 2 , result . Count ) ;
62
148
NUnit . Framework . Assert . AreEqual ( textInfo . GetText ( ) , result . Get ( page ) [ 0 ] . GetText ( ) ) ;
63
- NUnit . Framework . Assert . AreEqual ( textInfo . GetBbox ( ) . Count , result . Get ( page ) [ 0 ] . GetBbox ( ) . Count ) ;
64
149
}
65
150
66
151
[ LogMessage ( PdfOcrLogMessageConstant . COULD_NOT_FIND_CORRESPONDING_GLYPH_TO_UNICODE_CHARACTER , Count = 7 ) ]
@@ -113,18 +198,7 @@ public virtual ImageData ApplyRotation(ImageData imageData) {
113
198
}
114
199
}
115
200
116
- [ LogMessage ( PdfOcrLogMessageConstant . COULD_NOT_FIND_CORRESPONDING_GLYPH_TO_UNICODE_CHARACTER , Count = 7 ) ]
117
- [ NUnit . Framework . Test ]
118
- public virtual void TestThaiImageWithNotDefGlyphsDeprecationMode ( ) {
119
- String testName = "testThaiImageWithNotdefGlyphs" ;
120
- String path = PdfHelper . GetThaiImagePath ( ) ;
121
- String pdfPath = PdfHelper . GetTargetDirectory ( ) + testName + ".pdf" ;
122
- PdfHelper . CreatePdf ( pdfPath , new FileInfo ( path ) , new OcrPdfCreatorProperties ( ) . SetTextColor ( DeviceRgb . BLACK
123
- ) , true ) ;
124
- ExtractionStrategy strategy = PdfHelper . GetExtractionStrategy ( pdfPath ) ;
125
- PdfFont font = strategy . GetPdfFont ( ) ;
126
- String fontName = font . GetFontProgram ( ) . GetFontNames ( ) . GetFontName ( ) ;
127
- NUnit . Framework . Assert . IsTrue ( fontName . Contains ( "LiberationSans" ) ) ;
201
+ private class DummyMetaInfo : IMetaInfo {
128
202
}
129
203
}
130
204
}
0 commit comments