Skip to content

Commit 8d2c19d

Browse files
committed
[RELEASE] iText 7 pdfOcr - 1.0.2
https://git.itextsupport.com/ * release/1.0.2: [RELEASE] 1.0.2-SNAPSHOT -> 1.0.2 Drop revapi plugin in favor of japicmp plugin thai_03 test fails in .NET. Might be related to reading UTF-8 files issue Combine HOCR and TXT outputs for more precise text recognition Deprecate unused log message constant Stabilize test on different Tesseract versions Add possibility to set image preprocessing properties Tesseract does not respect image rotation when doing OCR Remove redundant MethodSignature subclass Use tesseract executable from path instead of tesseractDir in tests If path to tessdata contains non ASCII characters, code unexpectedly fails TextInfo: move from List<Float> to Rectangle Use generalized Jenkinsfile in the pipeline-library Deprecate Tesseract4LogMessageConstant#CANNOT_CONVERT_IMAGE_TO_GRAYSCALE Non-Ascii characters support for the output file Use ImageTypeDetector from io module to detect image types Use new SystemUtil#runProcessAndWait overload from 7.1.12-SNAPSHOT accepting working directory Only run on Jenkins nodes that have label tesseract [RELEASE] Update dependency versions
2 parents e413823 + 1efcf7c commit 8d2c19d

File tree

64 files changed

+2321
-874
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

64 files changed

+2321
-874
lines changed

Jenkinsfile

Lines changed: 3 additions & 294 deletions
Original file line numberDiff line numberDiff line change
@@ -1,298 +1,7 @@
11
#!/usr/bin/env groovy
22
@Library('pipeline-library')_
33

4-
def schedule, sonarBranchName, sonarBranchTarget
5-
switch (env.BRANCH_NAME) {
6-
case ~/.*master.*/:
7-
schedule = '@monthly'
8-
sonarBranchName = '-Dsonar.branch.name=master'
9-
sonarBranchTarget = ''
10-
break
11-
case ~/.*develop.*/:
12-
schedule = '@midnight'
13-
sonarBranchName = '-Dsonar.branch.name=develop'
14-
sonarBranchTarget = '-Dsonar.branch.target=master'
15-
break
16-
default:
17-
schedule = ''
18-
sonarBranchName = '-Dsonar.branch.name=' + env.BRANCH_NAME
19-
sonarBranchTarget = '-Dsonar.branch.target=develop'
20-
break
21-
}
4+
def repoName = "pdfOcr"
5+
def dependencyRegex = "itextcore"
226

23-
pipeline {
24-
25-
agent { label '!master' }
26-
27-
environment {
28-
JDK_VERSION = 'jdk-8-oracle'
29-
tesseractDir = tool name: 'Tesseract', type: 'com.cloudbees.jenkins.plugins.customtools.CustomTool'
30-
}
31-
32-
options {
33-
ansiColor('xterm')
34-
buildDiscarder logRotator(artifactNumToKeepStr: '1')
35-
parallelsAlwaysFailFast()
36-
skipStagesAfterUnstable()
37-
timeout time: 1, unit: 'HOURS'
38-
timestamps()
39-
}
40-
41-
triggers {
42-
cron(schedule)
43-
}
44-
45-
tools {
46-
maven 'M3'
47-
jdk "${JDK_VERSION}"
48-
}
49-
50-
stages {
51-
stage('Abort possible previous builds') {
52-
steps {
53-
script {
54-
abortPreviousBuilds()
55-
}
56-
}
57-
}
58-
stage('Wait for blocking jobs') {
59-
steps {
60-
script {
61-
properties[[
62-
$class : 'BuildBlockerProperty',
63-
blockLevel : 'GLOBAL',
64-
blockingJobs : "^iText_7_Java/itextcore/$env.JOB_BASE_NAME\$",
65-
scanQueueFor : 'ALL',
66-
useBuildBlocker: true
67-
]]
68-
}
69-
}
70-
}
71-
stage('Build') {
72-
options {
73-
retry(2)
74-
}
75-
stages {
76-
stage('Clean workspace') {
77-
options {
78-
timeout time: 5, unit: 'MINUTES'
79-
}
80-
steps {
81-
withMaven(jdk: "${JDK_VERSION}", maven: 'M3') {
82-
sh 'mvn --threads 2C --no-transfer-progress clean dependency:purge-local-repository ' +
83-
'-Dinclude=com.itextpdf -DresolutionFuzziness=groupId -DreResolve=false ' +
84-
"-Dmaven.repo.local=${env.WORKSPACE.replace('\\','/')}/.repository"
85-
}
86-
script {
87-
try {sh "rm -rf ${env.WORKSPACE.replace('\\','/')}/downloads"} catch (Exception ignored) {}
88-
}
89-
}
90-
}
91-
stage('Install branch dependencies') {
92-
options {
93-
timeout time: 5, unit: 'MINUTES'
94-
}
95-
when {
96-
not {
97-
anyOf {
98-
branch "master"
99-
branch "develop"
100-
}
101-
}
102-
}
103-
steps {
104-
script {
105-
getAndConfigureJFrogCLI()
106-
sh "./jfrog rt dl branch-artifacts/${env.JOB_BASE_NAME}/**/java/ downloads/"
107-
if (fileExists("downloads")) {
108-
dir ("downloads") {
109-
def mainPomFiles = findFiles glob: '**/main.pom'
110-
mainPomFiles.each { pomFile ->
111-
pomPath = pomFile.path.replace "\\", "/"
112-
sh "mvn org.apache.maven.plugins:maven-install-plugin:3.0.0-M1:install-file --quiet " +
113-
"-Dmaven.repo.local=${env.WORKSPACE.replace('\\','/')}/.repository " +
114-
"-Dpackaging=pom -Dfile=${pomPath} -DpomFile=${pomPath}"
115-
}
116-
def pomFiles = findFiles glob: '**/*.pom'
117-
pomFiles.each { pomFile ->
118-
if (pomFile.name != "main.pom") {
119-
pomPath = pomFile.path.replace "\\", "/"
120-
sh "mvn org.apache.maven.plugins:maven-install-plugin:3.0.0-M1:install-file --quiet " +
121-
"-Dmaven.repo.local=${env.WORKSPACE.replace('\\', '/')}/.repository " +
122-
"-Dpackaging=pom -Dfile=${pomPath} -DpomFile=${pomPath}"
123-
}
124-
}
125-
def jarFiles = findFiles glob: '**/*.jar'
126-
jarFiles.each { jarFile ->
127-
jarPath = jarFile.path.replace "\\", "/"
128-
sh "mvn org.apache.maven.plugins:maven-install-plugin:3.0.0-M1:install-file --quiet " +
129-
"-Dmaven.repo.local=${env.WORKSPACE.replace('\\', '/')}/.repository " +
130-
"-Dfile=${jarPath}"
131-
}
132-
}
133-
}
134-
}
135-
}
136-
}
137-
stage('Compile') {
138-
options {
139-
timeout time: 10, unit: 'MINUTES'
140-
}
141-
steps {
142-
withMaven(jdk: "${JDK_VERSION}", maven: 'M3') {
143-
sh 'mvn --threads 2C --no-transfer-progress package -Dmaven.test.skip=true ' +
144-
"-Dmaven.repo.local=${env.WORKSPACE.replace('\\','/')}/.repository"
145-
}
146-
}
147-
}
148-
}
149-
post {
150-
failure {
151-
sleep time: 2, unit: 'MINUTES'
152-
}
153-
success {
154-
script { currentBuild.result = 'SUCCESS' }
155-
}
156-
}
157-
}
158-
stage('Static Code Analysis') {
159-
options {
160-
timeout time: 1, unit: 'HOURS'
161-
}
162-
steps {
163-
withMaven(jdk: "${JDK_VERSION}", maven: 'M3', mavenLocalRepo: '.repository') {
164-
sh 'mvn --no-transfer-progress verify --activate-profiles qa ' +
165-
'-Dpmd.analysisCache=true ' +
166-
"-Dmaven.repo.local=${env.WORKSPACE.replace('\\','/')}/.repository"
167-
}
168-
recordIssues(tools: [
169-
checkStyle(),
170-
pmdParser(),
171-
spotBugs(useRankAsPriority: true)
172-
])
173-
dependencyCheckPublisher pattern: 'target/dependency-check-report.xml'
174-
}
175-
}
176-
stage('Run Tests') {
177-
options {
178-
timeout time: 30, unit: 'MINUTES'
179-
}
180-
steps {
181-
withMaven(jdk: "${JDK_VERSION}", maven: 'M3') {
182-
withSonarQubeEnv('Sonar') {
183-
sh 'mvn --no-transfer-progress --activate-profiles test ' +
184-
'-DgsExec="${gsExec}" -DcompareExec="${compareExec}" ' +
185-
'-DtesseractDir="${tesseractDir}" ' +
186-
'-Dmaven.main.skip=true -Dmaven.test.failure.ignore=false ' +
187-
'org.jacoco:jacoco-maven-plugin:prepare-agent verify org.jacoco:jacoco-maven-plugin:report ' +
188-
'-Dsonar.java.spotbugs.reportPaths="target/spotbugs.xml" ' +
189-
"-Dmaven.repo.local=${env.WORKSPACE.replace('\\','/')}/.repository " +
190-
'sonar:sonar ' + sonarBranchName + ' ' + sonarBranchTarget
191-
}
192-
}
193-
}
194-
}
195-
stage("Quality Gate") {
196-
options {
197-
timeout time: 1, unit: 'HOURS'
198-
}
199-
steps {
200-
waitForQualityGate abortPipeline: true
201-
}
202-
}
203-
stage('Artifactory Deploy') {
204-
options {
205-
timeout time: 5, unit: 'MINUTES'
206-
}
207-
when {
208-
anyOf {
209-
branch "master"
210-
branch "develop"
211-
}
212-
}
213-
steps {
214-
withMaven(jdk: "${JDK_VERSION}", maven: 'M3') {
215-
script {
216-
def server = Artifactory.server 'itext-artifactory'
217-
def rtMaven = Artifactory.newMavenBuild()
218-
rtMaven.deployer server: server, releaseRepo: 'releases', snapshotRepo: 'snapshot'
219-
rtMaven.tool = 'M3'
220-
def buildInfo = rtMaven.run pom: 'pom.xml', goals: '--threads 2C --no-transfer-progress install --activate-profiles artifactory ' +
221-
"-Dmaven.repo.local=${env.WORKSPACE.replace('\\','/')}/.repository".toString()
222-
server.publishBuildInfo buildInfo
223-
}
224-
}
225-
}
226-
}
227-
stage('Branch Artifactory Deploy') {
228-
options {
229-
timeout time: 5, unit: 'MINUTES'
230-
}
231-
when {
232-
not {
233-
anyOf {
234-
branch "master"
235-
branch "develop"
236-
}
237-
}
238-
}
239-
steps {
240-
script {
241-
if (env.GIT_URL) {
242-
repoName = ("${env.GIT_URL}" =~ /(.*\/)(.*)(\.git)/)[ 0 ][ 2 ]
243-
findFiles(glob: '*/target/*.jar').each { item ->
244-
if (!(item ==~ /.*\/[fs]b-contrib-.*?.jar/) && !(item ==~ /.*\/findsecbugs-plugin-.*?.jar/) && !(item ==~ /.*-sources.jar/) && !(item ==~ /.*-javadoc.jar/)) {
245-
sh "./jfrog rt u \"${item.path}\" branch-artifacts/${env.BRANCH_NAME}/${repoName}/java/ --recursive=false --build-name ${env.BRANCH_NAME} --build-number ${env.BUILD_NUMBER} --props \"vcs.revision=${env.GIT_COMMIT};repo.name=${repoName}\""
246-
}
247-
}
248-
findFiles(glob: '**/pom.xml').each { item ->
249-
def pomPath = item.path.replace('\\', '/')
250-
if (!(pomPath ==~ /.*target.*/)) {
251-
def resPomName = "main.pom"
252-
def subDirMatcher = (pomPath =~ /^.*(?<=\/|^)(.*)\/pom\.xml/)
253-
if (subDirMatcher.matches()) {
254-
resPomName = "${subDirMatcher[0][1]}.pom"
255-
}
256-
sh "./jfrog rt u \"${item.path}\" branch-artifacts/${env.BRANCH_NAME}/${repoName}/java/${resPomName} --recursive=false --build-name ${env.BRANCH_NAME} --build-number ${env.BUILD_NUMBER} --props \"vcs.revision=${env.GIT_COMMIT};repo.name=${repoName}\""
257-
}
258-
}
259-
}
260-
}
261-
}
262-
}
263-
}
264-
265-
post {
266-
always {
267-
echo 'One way or another, I have finished \uD83E\uDD16'
268-
}
269-
success {
270-
echo 'I succeeeded! \u263A'
271-
cleanWs deleteDirs: true
272-
}
273-
unstable {
274-
echo 'I am unstable \uD83D\uDE2E'
275-
}
276-
failure {
277-
echo 'I failed \uD83D\uDCA9'
278-
}
279-
changed {
280-
echo 'Things were different before... \uD83E\uDD14'
281-
}
282-
fixed {
283-
script {
284-
if (env.BRANCH_NAME.contains('master') || env.BRANCH_NAME.contains('develop')) {
285-
slackNotifier "#ci", currentBuild.currentResult, "${env.BRANCH_NAME} - Back to normal"
286-
}
287-
}
288-
}
289-
regression {
290-
script {
291-
if (env.BRANCH_NAME.contains('master') || env.BRANCH_NAME.contains('develop')) {
292-
slackNotifier "#ci", currentBuild.currentResult, "${env.BRANCH_NAME} - First failure"
293-
}
294-
}
295-
}
296-
}
297-
298-
}
7+
automaticJavaBuild(repoName, dependencyRegex)

pdfocr-api/pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
<parent>
66
<groupId>com.itextpdf</groupId>
77
<artifactId>pdfocr-root</artifactId>
8-
<version>1.0.1</version>
8+
<version>1.0.2</version>
99
</parent>
1010

1111
<artifactId>pdfocr-api</artifactId>
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
/*
2+
This file is part of the iText (R) project.
3+
Copyright (c) 1998-2020 iText Group NV
4+
Authors: iText Software.
5+
6+
This program is offered under a commercial and under the AGPL license.
7+
For commercial licensing, contact us at https://itextpdf.com/sales. For AGPL licensing, see below.
8+
9+
AGPL licensing:
10+
This program is free software: you can redistribute it and/or modify
11+
it under the terms of the GNU Affero General Public License as published by
12+
the Free Software Foundation, either version 3 of the License, or
13+
(at your option) any later version.
14+
15+
This program is distributed in the hope that it will be useful,
16+
but WITHOUT ANY WARRANTY; without even the implied warranty of
17+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18+
GNU Affero General Public License for more details.
19+
20+
You should have received a copy of the GNU Affero General Public License
21+
along with this program. If not, see <https://www.gnu.org/licenses/>.
22+
*/
23+
package com.itextpdf.pdfocr;
24+
25+
import com.itextpdf.io.image.ImageData;
26+
27+
/**
28+
* Rotation information may be stored in image metadata.
29+
* For OCR and adding image to document that rotation
30+
* should be applied to the image, so that it is actually rotated,
31+
* not via metadata properties.
32+
* Interface ia responsible for extracting rotation from metadata
33+
* and applying in to the image.
34+
*/
35+
public interface IImageRotationHandler {
36+
37+
/**
38+
* Apply rotation to image data.
39+
* If image is not rotated - does nothing.
40+
* @param imageData to apply rotation to
41+
* @return rotated image if rotation flag is set or self if no rotation
42+
*/
43+
public abstract ImageData applyRotation(ImageData imageData);
44+
45+
}

0 commit comments

Comments
 (0)