@@ -22,7 +22,6 @@ This file is part of the iText (R) project.
22
22
*/
23
23
package com .itextpdf .pdfocr .tessdata ;
24
24
25
- import com .itextpdf .io .util .MessageFormatUtil ;
26
25
import com .itextpdf .kernel .colors .DeviceCmyk ;
27
26
import com .itextpdf .kernel .pdf .PdfWriter ;
28
27
import com .itextpdf .kernel .utils .CompareTool ;
@@ -32,14 +31,11 @@ This file is part of the iText (R) project.
32
31
import com .itextpdf .pdfocr .PdfOcrLogMessageConstant ;
33
32
import com .itextpdf .pdfocr .tesseract4 .AbstractTesseract4OcrEngine ;
34
33
import com .itextpdf .pdfocr .tesseract4 .Tesseract4OcrEngineProperties ;
35
- import com .itextpdf .pdfocr .tesseract4 .Tesseract4OcrException ;
36
34
import com .itextpdf .pdfocr .tesseract4 .TextPositioning ;
37
35
import com .itextpdf .test .annotations .LogMessage ;
38
36
import com .itextpdf .test .annotations .LogMessages ;
39
37
40
38
import java .io .File ;
41
- import java .io .FileInputStream ;
42
- import java .io .FileNotFoundException ;
43
39
import java .io .IOException ;
44
40
import java .nio .file .Files ;
45
41
import java .util .ArrayList ;
@@ -627,97 +623,6 @@ public void testJapaneseScript() {
627
623
Assert .assertEquals (expected , result );
628
624
}
629
625
630
- @ Test
631
- public void testCustomUserWords () {
632
- String imgPath = TEST_IMAGES_DIRECTORY + "wierdwords.png" ;
633
- List <String > userWords = Arrays .<String >asList ("he23llo" , "qwetyrtyqpwe-rty" );
634
-
635
- Tesseract4OcrEngineProperties properties =
636
- tesseractReader .getTesseract4OcrEngineProperties ();
637
- properties .setLanguages (Arrays .asList ("fra" ));
638
- properties .setUserWords ("fra" , userWords );
639
- tesseractReader .setTesseract4OcrEngineProperties (properties );
640
- String result = getRecognizedTextFromTextFile (tesseractReader , imgPath );
641
- Assert .assertTrue (result .contains (userWords .get (0 ))
642
- || result .contains (userWords .get (1 )));
643
-
644
- Assert .assertTrue (tesseractReader .getTesseract4OcrEngineProperties ()
645
- .getPathToUserWordsFile ().endsWith (".user-words" ));
646
- }
647
-
648
- @ Test
649
- public void testCustomUserWordsWithListOfLanguages () {
650
- String imgPath = TEST_IMAGES_DIRECTORY + "bogusText.jpg" ;
651
- String expectedOutput = "B1adeb1ab1a" ;
652
-
653
- Tesseract4OcrEngineProperties properties =
654
- tesseractReader .getTesseract4OcrEngineProperties ();
655
- properties .setLanguages (Arrays .asList ("fra" , "eng" ));
656
- properties .setUserWords ("eng" , Arrays .<String >asList ("b1adeb1ab1a" ));
657
- tesseractReader .setTesseract4OcrEngineProperties (properties );
658
-
659
- String result = getRecognizedTextFromTextFile (tesseractReader , imgPath );
660
- result = result .replace ("\n " , "" ).replace ("\f " , "" );
661
- result = result .replaceAll ("[^\\ u0009\\ u000A\\ u000D\\ u0020-\\ u007E]" , "" );
662
- Assert .assertTrue (result .startsWith (expectedOutput ));
663
-
664
- Assert .assertTrue (tesseractReader .getTesseract4OcrEngineProperties ()
665
- .getPathToUserWordsFile ().endsWith (".user-words" ));
666
- }
667
-
668
- @ Test
669
- public void testUserWordsWithLanguageNotInList () throws FileNotFoundException {
670
- junitExpectedException .expect (Tesseract4OcrException .class );
671
- junitExpectedException .expectMessage (MessageFormatUtil
672
- .format (Tesseract4OcrException .LANGUAGE_IS_NOT_IN_THE_LIST ,
673
- "spa" ));
674
- String userWords = TEST_DOCUMENTS_DIRECTORY + "userwords.txt" ;
675
- Tesseract4OcrEngineProperties properties =
676
- tesseractReader .getTesseract4OcrEngineProperties ();
677
- properties .setUserWords ("spa" , new FileInputStream (userWords ));
678
- properties .setLanguages (new ArrayList <String >());
679
- }
680
-
681
- @ Test
682
- public void testIncorrectLanguageForUserWordsAsList () {
683
- junitExpectedException .expect (Tesseract4OcrException .class );
684
- junitExpectedException .expectMessage (MessageFormatUtil
685
- .format (Tesseract4OcrException .LANGUAGE_IS_NOT_IN_THE_LIST ,
686
- "eng1" ));
687
- Tesseract4OcrEngineProperties properties =
688
- tesseractReader .getTesseract4OcrEngineProperties ();
689
- properties .setUserWords ("eng1" , Arrays .<String >asList ("word1" , "word2" ));
690
- properties .setLanguages (new ArrayList <String >());
691
- }
692
-
693
- @ Test
694
- public void testUserWordsWithDefaultLanguageNotInList ()
695
- throws FileNotFoundException {
696
- String userWords = TEST_DOCUMENTS_DIRECTORY + "userwords.txt" ;
697
- Tesseract4OcrEngineProperties properties =
698
- tesseractReader .getTesseract4OcrEngineProperties ();
699
- properties .setUserWords ("eng" , new FileInputStream (userWords ));
700
- properties .setLanguages (new ArrayList <String >());
701
- tesseractReader .setTesseract4OcrEngineProperties (properties );
702
- String imgPath = TEST_IMAGES_DIRECTORY + "numbers_01.jpg" ;
703
- String expectedOutput = "619121" ;
704
- String result = getRecognizedTextFromTextFile (tesseractReader , imgPath );
705
- Assert .assertTrue (result .startsWith (expectedOutput ));
706
- }
707
-
708
- @ Test
709
- public void testUserWordsFileNotDeleted () {
710
- String userWords = TEST_DOCUMENTS_DIRECTORY + "userwords.txt" ;
711
- Tesseract4OcrEngineProperties properties =
712
- tesseractReader .getTesseract4OcrEngineProperties ();
713
- properties .setPathToUserWordsFile (userWords );
714
- properties .setLanguages (Arrays .<String >asList ("eng" ));
715
- tesseractReader .setTesseract4OcrEngineProperties (properties );
716
- String imgPath = TEST_IMAGES_DIRECTORY + "numbers_01.jpg" ;
717
- tesseractReader .doImageOcr (new File (imgPath ));
718
- Assert .assertTrue (new File (userWords ).exists ());
719
- }
720
-
721
626
/**
722
627
* Do OCR for given image and compare result text file with expected one.
723
628
*/
0 commit comments