Skip to content

Commit aea7e82

Browse files
author
Eugene Bochilo
committed
Support Replacements and alternatives rules for UA-2
DEVSIX-9003
1 parent a921c5a commit aea7e82

File tree

12 files changed

+747
-8
lines changed

12 files changed

+747
-8
lines changed

kernel/src/main/java/com/itextpdf/kernel/pdf/canvas/PdfCanvas.java

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ This file is part of the iText (R) project.
7272
import com.itextpdf.kernel.pdf.xobject.PdfXObject;
7373
import com.itextpdf.kernel.validation.context.CanvasBmcValidationContext;
7474
import com.itextpdf.kernel.validation.context.CanvasStackValidationContext;
75+
import com.itextpdf.kernel.validation.context.CanvasTextAdditionContext;
7576
import com.itextpdf.kernel.validation.context.CanvasWritingContentValidationContext;
7677
import com.itextpdf.kernel.validation.context.ExtendedGStateValidationContext;
7778
import com.itextpdf.kernel.validation.context.FillColorValidationContext;
@@ -743,6 +744,7 @@ public PdfCanvas showText(GlyphLine text, Iterator<GlyphLine.GlyphLinePart> iter
743744
KernelExceptionMessageConstant.FONT_AND_SIZE_MUST_BE_SET_BEFORE_WRITING_ANY_TEXT, currentGs);
744745
}
745746

747+
checkTextOnAddition(text);
746748
document.checkIsoConformance(new FontValidationContext(text.toString(), currentGs.getFont()));
747749

748750
final float fontSize = FontProgram.convertTextSpaceToGlyphSpace(currentGs.getFontSize());
@@ -930,13 +932,14 @@ public PdfCanvas showText(PdfArray textArray) {
930932
}
931933

932934
// Take text part to process
933-
StringBuilder text = new StringBuilder();
935+
StringBuilder decodedText = new StringBuilder();
934936
for (PdfObject obj : textArray) {
935937
if (obj instanceof PdfString) {
936-
text.append(obj);
938+
decodedText.append(currentGs.getFont().decode((PdfString) obj));
937939
}
938940
}
939-
document.checkIsoConformance(new FontValidationContext(text.toString(), currentGs.getFont()));
941+
checkTextOnAddition(decodedText.toString());
942+
document.checkIsoConformance(new FontValidationContext(decodedText.toString(), currentGs.getFont()));
940943

941944
contentStream.getOutputStream().writeBytes(ByteUtils.getIsoBytes("["));
942945
for (PdfObject obj : textArray) {
@@ -2508,6 +2511,7 @@ private void showTextInt(String text) {
25082511
KernelExceptionMessageConstant.FONT_AND_SIZE_MUST_BE_SET_BEFORE_WRITING_ANY_TEXT, currentGs);
25092512
}
25102513
this.checkIsoConformanceWritingOnContent();
2514+
checkTextOnAddition(text);
25112515
document.checkIsoConformance(new FontValidationContext(text, currentGs.getFont()));
25122516

25132517
currentGs.getFont().writeText(text, contentStream.getOutputStream());
@@ -2697,6 +2701,18 @@ private static double[] getEllipseRoundedRectPoints(double x, double y, double w
26972701
pt4[2], pt4[3], pt4[4], pt4[5], pt4[6], pt4[7]};
26982702
}
26992703

2704+
private void checkTextOnAddition(GlyphLine text) {
2705+
checkTextOnAddition(text.toString());
2706+
}
2707+
2708+
private void checkTextOnAddition(String text) {
2709+
PdfDictionary attributes = null;
2710+
if (!tagStructureStack.isEmpty()) {
2711+
attributes = tagStructureStack.peek().getSecond();
2712+
}
2713+
document.checkIsoConformance(new CanvasTextAdditionContext(text, attributes, contentStream));
2714+
}
2715+
27002716
/**
27012717
* This method is used to traverse parent tree and begin all layers in it.
27022718
* If layer was already begun during method call, it will not be processed again.

kernel/src/main/java/com/itextpdf/kernel/validation/ValidationType.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,5 +49,6 @@ public enum ValidationType {
4949
LAYOUT,
5050
DUPLICATE_ID_ENTRY,
5151
DESTINATION_ADDITION,
52-
ANNOTATION
52+
ANNOTATION,
53+
CANVAS_TEXT_ADDITION
5354
}
Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
/*
2+
This file is part of the iText (R) project.
3+
Copyright (c) 1998-2025 Apryse Group NV
4+
Authors: Apryse Software.
5+
6+
This program is offered under a commercial and under the AGPL license.
7+
For commercial licensing, contact us at https://itextpdf.com/sales. For AGPL licensing, see below.
8+
9+
AGPL licensing:
10+
This program is free software: you can redistribute it and/or modify
11+
it under the terms of the GNU Affero General Public License as published by
12+
the Free Software Foundation, either version 3 of the License, or
13+
(at your option) any later version.
14+
15+
This program is distributed in the hope that it will be useful,
16+
but WITHOUT ANY WARRANTY; without even the implied warranty of
17+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18+
GNU Affero General Public License for more details.
19+
20+
You should have received a copy of the GNU Affero General Public License
21+
along with this program. If not, see <https://www.gnu.org/licenses/>.
22+
*/
23+
package com.itextpdf.kernel.validation.context;
24+
25+
import com.itextpdf.kernel.pdf.PdfDictionary;
26+
import com.itextpdf.kernel.pdf.PdfName;
27+
import com.itextpdf.kernel.pdf.PdfNumber;
28+
import com.itextpdf.kernel.pdf.PdfStream;
29+
import com.itextpdf.kernel.validation.IValidationContext;
30+
import com.itextpdf.kernel.validation.ValidationType;
31+
32+
/**
33+
* Class which contains context in which text was added to canvas.
34+
*/
35+
public class CanvasTextAdditionContext implements IValidationContext {
36+
private final String text;
37+
private PdfNumber mcId;
38+
private final PdfDictionary attributes;
39+
private final PdfStream contentStream;
40+
41+
/**
42+
* Creates {@link CanvasTextAdditionContext} instance.
43+
*
44+
* @param text text which was added to canvas
45+
* @param attributes {@link PdfDictionary} attributes which correspond to this text
46+
* @param contentStream {@link PdfStream} in which text is written
47+
*/
48+
public CanvasTextAdditionContext(String text, PdfDictionary attributes, PdfStream contentStream) {
49+
this.text = text;
50+
this.attributes = attributes;
51+
this.contentStream = contentStream;
52+
if (attributes != null) {
53+
this.mcId = attributes.getAsNumber(PdfName.MCID);
54+
}
55+
}
56+
57+
/**
58+
* Gets text which was added to canvas.
59+
*
60+
* @return text which was added to canvas
61+
*/
62+
public String getText() {
63+
return text;
64+
}
65+
66+
/**
67+
* Gets {@link PdfNumber} which represents MCID of this text.
68+
*
69+
* @return {@link PdfNumber} which represents MCID of this text
70+
*/
71+
public PdfNumber getMcId() {
72+
return mcId;
73+
}
74+
75+
/**
76+
* Gets {@link PdfDictionary} attributes which correspond to the added text.
77+
*
78+
* @return {@link PdfDictionary} attributes which correspond to the added text
79+
*/
80+
public PdfDictionary getAttributes() {
81+
return attributes;
82+
}
83+
84+
/**
85+
* Returns {@link PdfStream} on which text is written.
86+
*
87+
* @return {@link PdfStream} on which text is written
88+
*/
89+
public PdfStream getContentStream() {
90+
return contentStream;
91+
}
92+
93+
@Override
94+
public ValidationType getType() {
95+
return ValidationType.CANVAS_TEXT_ADDITION;
96+
}
97+
}

pdfa/src/test/java/com/itextpdf/pdfa/PdfAFontTest.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ This file is part of the iText (R) project.
2727
import com.itextpdf.io.font.FontEncoding;
2828
import com.itextpdf.io.font.FontProgramFactory;
2929
import com.itextpdf.io.font.PdfEncodings;
30+
import com.itextpdf.io.logs.IoLogMessageConstant;
3031
import com.itextpdf.io.util.StreamUtil;
3132
import com.itextpdf.kernel.colors.ColorConstants;
3233
import com.itextpdf.kernel.font.PdfFont;
@@ -56,13 +57,16 @@ This file is part of the iText (R) project.
5657
import com.itextpdf.pdfa.exceptions.PdfaExceptionMessageConstant;
5758
import com.itextpdf.test.ExtendedITextTest;
5859
import com.itextpdf.test.TestUtil;
60+
import com.itextpdf.test.annotations.LogMessage;
61+
import com.itextpdf.test.annotations.LogMessages;
5962
import com.itextpdf.test.pdfa.VeraPdfValidator; // Android-Conversion-Skip-Line (TODO DEVSIX-7377 introduce pdf/ua validation on Android)
6063

6164
import java.io.ByteArrayOutputStream;
6265
import java.io.IOException;
6366
import java.io.InputStream;
6467
import org.junit.jupiter.api.Assertions;
6568
import org.junit.jupiter.api.BeforeAll;
69+
import org.junit.jupiter.api.Disabled;
6670
import org.junit.jupiter.api.Tag;
6771
import org.junit.jupiter.api.Test;
6872
import static org.junit.jupiter.api.Assertions.fail;
@@ -411,6 +415,8 @@ public void glyphLineWithUndefinedGlyphsTest() throws Exception {
411415
}
412416

413417
@Test
418+
@LogMessages(messages = @LogMessage(messageTemplate = IoLogMessageConstant.COULD_NOT_FIND_GLYPH_WITH_CODE, count = 6))
419+
@Disabled("DEVSIX-9125")
414420
public void pdfArrayWithUndefinedGlyphsTest() throws Exception {
415421
String outPdf = DESTINATION_FOLDER + "pdfArrayWithUndefinedGlyphs.pdf";
416422

pdfua/src/main/java/com/itextpdf/pdfua/checkers/PdfUA2Checker.java

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ This file is part of the iText (R) project.
3939
import com.itextpdf.kernel.utils.checkers.PdfCheckersUtil;
4040
import com.itextpdf.kernel.validation.IValidationContext;
4141
import com.itextpdf.kernel.validation.context.CanvasBmcValidationContext;
42+
import com.itextpdf.kernel.validation.context.CanvasTextAdditionContext;
4243
import com.itextpdf.kernel.validation.context.CanvasWritingContentValidationContext;
4344
import com.itextpdf.kernel.validation.context.FontValidationContext;
4445
import com.itextpdf.kernel.validation.context.PdfAnnotationContext;
@@ -54,6 +55,7 @@ This file is part of the iText (R) project.
5455
import com.itextpdf.pdfua.checkers.utils.PdfUAValidationContext;
5556
import com.itextpdf.pdfua.checkers.utils.tables.TableCheckUtil;
5657
import com.itextpdf.pdfua.checkers.utils.ua2.PdfUA2AnnotationChecker;
58+
import com.itextpdf.pdfua.checkers.utils.ua2.PdfUA2CanvasTextChecker;
5759
import com.itextpdf.pdfua.checkers.utils.ua2.PdfUA2DestinationsChecker;
5860
import com.itextpdf.pdfua.checkers.utils.ua2.PdfUA2EmbeddedFilesChecker;
5961
import com.itextpdf.pdfua.checkers.utils.ua2.PdfUA2FormChecker;
@@ -85,6 +87,7 @@ public class PdfUA2Checker extends PdfUAChecker {
8587

8688
private final PdfDocument pdfDocument;
8789
private final PdfUAValidationContext context;
90+
private final PdfUA2CanvasTextChecker textChecker = new PdfUA2CanvasTextChecker();
8891

8992
/**
9093
* Creates {@link PdfUA2Checker} instance with PDF document which will be validated against PDF/UA-2 standard.
@@ -105,8 +108,9 @@ public void validate(IValidationContext context) {
105108
checkCatalog(pdfDocContext.getPdfDocument().getCatalog());
106109
checkStructureTreeRoot(pdfDocContext.getPdfDocument().getStructTreeRoot());
107110
checkFonts(pdfDocContext.getDocumentFonts());
108-
new PdfUA2DestinationsChecker(pdfDocument).checkDestinations();
111+
new PdfUA2DestinationsChecker(pdfDocContext.getPdfDocument()).checkDestinations();
109112
PdfUA2XfaChecker.check(pdfDocContext.getPdfDocument());
113+
textChecker.checkCollectedContexts(pdfDocContext.getPdfDocument());
110114
break;
111115
case FONT:
112116
FontValidationContext fontContext = (FontValidationContext) context;
@@ -138,6 +142,10 @@ public void validate(IValidationContext context) {
138142
PdfAnnotationContext annotationContext = (PdfAnnotationContext) context;
139143
PdfUA2AnnotationChecker.checkAnnotation(annotationContext.getAnnotation(), this.context);
140144
break;
145+
case CANVAS_TEXT_ADDITION:
146+
CanvasTextAdditionContext canvasTextAdditionContext = (CanvasTextAdditionContext) context;
147+
textChecker.collectTextAdditionContext(canvasTextAdditionContext);
148+
break;
141149
}
142150
}
143151

Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
/*
2+
This file is part of the iText (R) project.
3+
Copyright (c) 1998-2025 Apryse Group NV
4+
Authors: Apryse Software.
5+
6+
This program is offered under a commercial and under the AGPL license.
7+
For commercial licensing, contact us at https://itextpdf.com/sales. For AGPL licensing, see below.
8+
9+
AGPL licensing:
10+
This program is free software: you can redistribute it and/or modify
11+
it under the terms of the GNU Affero General Public License as published by
12+
the Free Software Foundation, either version 3 of the License, or
13+
(at your option) any later version.
14+
15+
This program is distributed in the hope that it will be useful,
16+
but WITHOUT ANY WARRANTY; without even the implied warranty of
17+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18+
GNU Affero General Public License for more details.
19+
20+
You should have received a copy of the GNU Affero General Public License
21+
along with this program. If not, see <https://www.gnu.org/licenses/>.
22+
*/
23+
package com.itextpdf.pdfua.checkers.utils.ua2;
24+
25+
import com.itextpdf.kernel.pdf.PdfDictionary;
26+
import com.itextpdf.kernel.pdf.PdfDocument;
27+
import com.itextpdf.kernel.pdf.PdfName;
28+
import com.itextpdf.kernel.pdf.PdfNumber;
29+
import com.itextpdf.kernel.pdf.PdfPage;
30+
import com.itextpdf.kernel.pdf.PdfStream;
31+
import com.itextpdf.kernel.pdf.PdfString;
32+
import com.itextpdf.kernel.pdf.tagging.IStructureNode;
33+
import com.itextpdf.kernel.pdf.tagging.PdfMcr;
34+
import com.itextpdf.kernel.pdf.tagging.PdfStructElem;
35+
import com.itextpdf.kernel.validation.context.CanvasTextAdditionContext;
36+
import com.itextpdf.pdfua.exceptions.PdfUAConformanceException;
37+
import com.itextpdf.pdfua.exceptions.PdfUAExceptionMessageConstants;
38+
39+
import java.util.ArrayList;
40+
import java.util.List;
41+
42+
/**
43+
* Utility class which performs UA-2 checks related to Replacements and Alternatives.
44+
*/
45+
public class PdfUA2CanvasTextChecker {
46+
47+
private final List<CanvasTextAdditionContext> textWithPua = new ArrayList<>();
48+
49+
/**
50+
* Creates {@link PdfUA2CanvasTextChecker} instance.
51+
*/
52+
public PdfUA2CanvasTextChecker() {
53+
// Empty constructor.
54+
}
55+
56+
/**
57+
* Collects all text strings, which contain PUA Unicode values.
58+
*
59+
* @param context {@link CanvasTextAdditionContext} which contains all the data needed for validation
60+
*/
61+
public void collectTextAdditionContext(CanvasTextAdditionContext context) {
62+
String text = context.getText();
63+
PdfDictionary attributes = context.getAttributes();
64+
PdfString alt = null;
65+
PdfString actualText = null;
66+
if (attributes != null) {
67+
alt = attributes.getAsString(PdfName.Alt);
68+
actualText = attributes.getAsString(PdfName.ActualText);
69+
}
70+
if (PdfUA2StringChecker.stringContainsPua(text)) {
71+
if (alt == null && actualText == null) {
72+
textWithPua.add(context);
73+
}
74+
}
75+
}
76+
77+
/**
78+
* Checks previously collected data according to Replacements and Alternatives UA-2 rules.
79+
*
80+
* @param document {@link PdfDocument} to be checked
81+
*/
82+
public void checkCollectedContexts(PdfDocument document) {
83+
for (CanvasTextAdditionContext context : textWithPua) {
84+
if (context.getMcId() == null) {
85+
throw new PdfUAConformanceException(PdfUAExceptionMessageConstants.PUA_CONTENT_WITHOUT_ALT);
86+
}
87+
PdfMcr mcr = findMcrByMcId(document, context.getMcId(), context.getContentStream());
88+
if (mcr == null) {
89+
throw new PdfUAConformanceException(PdfUAExceptionMessageConstants.PUA_CONTENT_WITHOUT_ALT);
90+
}
91+
IStructureNode structureNode = mcr.getParent();
92+
if (!(structureNode instanceof PdfStructElem)) {
93+
throw new PdfUAConformanceException(PdfUAExceptionMessageConstants.PUA_CONTENT_WITHOUT_ALT);
94+
}
95+
PdfStructElem structElem = (PdfStructElem) structureNode;
96+
PdfString alt = structElem.getAlt();
97+
PdfString actualText = structElem.getActualText();
98+
if (alt == null && actualText == null) {
99+
throw new PdfUAConformanceException(PdfUAExceptionMessageConstants.PUA_CONTENT_WITHOUT_ALT);
100+
}
101+
}
102+
}
103+
104+
private static PdfMcr findMcrByMcId(PdfDocument document, PdfNumber mcId, PdfStream contentStream) {
105+
for (int i = 1; i <= document.getNumberOfPages(); ++i) {
106+
PdfPage page = document.getPage(i);
107+
for (int j = 0; j < page.getContentStreamCount(); ++j) {
108+
PdfStream pageStream = page.getContentStream(j);
109+
if (pageStream.getIndirectReference().equals(contentStream.getIndirectReference())) {
110+
PdfMcr mcr = document.getStructTreeRoot().findMcrByMcid(page.getPdfObject(), mcId.intValue());
111+
if (mcr != null) {
112+
return mcr;
113+
}
114+
}
115+
}
116+
}
117+
return null;
118+
}
119+
}

pdfua/src/main/java/com/itextpdf/pdfua/checkers/utils/ua2/PdfUA2StringChecker.java

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -46,15 +46,24 @@ public static void checkPdfString(PdfString string) {
4646
if (PdfEncodings.PDF_DOC_ENCODING.equals(string.getEncoding()) ||
4747
PdfEncodings.UTF8.equals(string.getEncoding()) ||
4848
PdfEncodings.UNICODE_BIG.equals(string.getEncoding())) {
49-
for (int i = 0; i < string.getValue().length(); ++i) {
50-
int code = string.getValue().codePointAt(i);
49+
if (stringContainsPua(string.getValue())) {
50+
throw new PdfUAConformanceException(PdfUAExceptionMessageConstants.TEXT_STRING_USES_UNICODE_PUA);
51+
}
52+
}
53+
}
54+
55+
static boolean stringContainsPua(String string) {
56+
if (string != null) {
57+
for (int i = 0; i < string.length(); ++i) {
58+
int code = string.codePointAt(i);
5159
boolean isPrivateArea = code >= 0xE000 && code <= 0xF8FF;
5260
boolean isSupplementaryPrivateAreaA = code >= 0xF0000 && code <= 0xFFFFD;
5361
boolean isSupplementaryPrivateAreaB = code >= 0x100000 && code <= 0x10FFFD;
5462
if (isPrivateArea || isSupplementaryPrivateAreaA || isSupplementaryPrivateAreaB) {
55-
throw new PdfUAConformanceException(PdfUAExceptionMessageConstants.TEXT_STRING_USES_UNICODE_PUA);
63+
return true;
5664
}
5765
}
5866
}
67+
return false;
5968
}
6069
}

0 commit comments

Comments
 (0)