Skip to content

Commit 06efab5

Browse files
authored
fix(sanitize): fix JavaScript handling, embedded file sanitization (#4652)
# Description of Changes ### Fixes - Added document-level JavaScript removal: Now removes OpenAction and catalog additional actions (WC, WS, DS, WP, DP) that execute on document open, save, print, and close events - Added page-level JavaScript removal: Removes page open/close actions (O, C) that were previously missed - Added annotation additional actions removal: Removes all 10 annotation event handlers (Bl, D, E, Fo, PC, PI, PO, PV, U, X) for mouse/focus events - Fixed embedded file removal: Corrected implementation to use `catalog.getNames().setEmbeddedFiles(null)` instead of incorrectly targeting page resources ### Verification: Before (after embedded file "removal"): <img width="706" height="671" alt="image" src="https://github.com/user-attachments/assets/7d10e1ba-78bc-4094-b28a-0eae3613db3c" /> After: <img width="706" height="671" alt="image" src="https://github.com/user-attachments/assets/b278c2a6-1605-483f-b39c-1c3aa047acc2" /> <!-- Please provide a summary of the changes, including: - What was changed - Why the change was made - Any challenges encountered Closes #(issue_number) --> --- ## Checklist ### General - [x] I have read the [Contribution Guidelines](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/CONTRIBUTING.md) - [x] I have read the [Stirling-PDF Developer Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/DeveloperGuide.md) (if applicable) - [ ] I have read the [How to add new languages to Stirling-PDF](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/HowToAddNewLanguage.md) (if applicable) - [x] I have performed a self-review of my own code - [x] My changes generate no new warnings ### Documentation - [ ] I have updated relevant docs on [Stirling-PDF's doc repo](https://github.com/Stirling-Tools/Stirling-Tools.github.io/blob/main/docs/) (if functionality has heavily changed) - [ ] I have read the section [Add New Translation Tags](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/HowToAddNewLanguage.md#add-new-translation-tags) (for new translation tags only) ### UI Changes (if applicable) - [ ] Screenshots or videos demonstrating the UI changes are attached (e.g., as comments or direct attachments in the PR) ### Testing (if applicable) - [x] I have tested my changes locally. Refer to the [Testing Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/DeveloperGuide.md#6-testing) for more details. Signed-off-by: Balázs Szücs <[email protected]>
1 parent 085b879 commit 06efab5

File tree

1 file changed

+77
-33
lines changed

1 file changed

+77
-33
lines changed

app/core/src/main/java/stirling/software/SPDF/controller/api/security/SanitizeController.java

Lines changed: 77 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -2,22 +2,25 @@
22

33
import java.io.ByteArrayOutputStream;
44
import java.io.IOException;
5+
import java.util.List;
56

67
import org.apache.pdfbox.cos.COSDictionary;
78
import org.apache.pdfbox.cos.COSName;
89
import org.apache.pdfbox.pdmodel.PDDocument;
910
import org.apache.pdfbox.pdmodel.PDDocumentCatalog;
1011
import org.apache.pdfbox.pdmodel.PDDocumentInformation;
12+
import org.apache.pdfbox.pdmodel.PDDocumentNameDictionary;
1113
import org.apache.pdfbox.pdmodel.PDPage;
12-
import org.apache.pdfbox.pdmodel.PDPageTree;
13-
import org.apache.pdfbox.pdmodel.PDResources;
1414
import org.apache.pdfbox.pdmodel.common.PDMetadata;
1515
import org.apache.pdfbox.pdmodel.interactive.action.PDAction;
1616
import org.apache.pdfbox.pdmodel.interactive.action.PDActionJavaScript;
1717
import org.apache.pdfbox.pdmodel.interactive.action.PDActionLaunch;
1818
import org.apache.pdfbox.pdmodel.interactive.action.PDActionURI;
19+
import org.apache.pdfbox.pdmodel.interactive.action.PDDocumentCatalogAdditionalActions;
1920
import org.apache.pdfbox.pdmodel.interactive.action.PDFormFieldAdditionalActions;
21+
import org.apache.pdfbox.pdmodel.interactive.action.PDPageAdditionalActions;
2022
import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation;
23+
import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationFileAttachment;
2124
import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationLink;
2225
import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationWidget;
2326
import org.apache.pdfbox.pdmodel.interactive.form.PDAcroForm;
@@ -34,6 +37,7 @@
3437
import io.swagger.v3.oas.annotations.tags.Tag;
3538

3639
import lombok.RequiredArgsConstructor;
40+
import lombok.extern.slf4j.Slf4j;
3741

3842
import stirling.software.SPDF.model.api.security.SanitizePdfRequest;
3943
import stirling.software.common.service.CustomPDFDocumentFactory;
@@ -43,6 +47,7 @@
4347
@RestController
4448
@RequestMapping("/api/v1/security")
4549
@Tag(name = "Security", description = "Security APIs")
50+
@Slf4j
4651
@RequiredArgsConstructor
4752
public class SanitizeController {
4853

@@ -99,7 +104,7 @@ public ResponseEntity<byte[]> sanitizePDF(@ModelAttribute SanitizePdfRequest req
99104
GeneralUtils.generateFilename(inputFile.getOriginalFilename(), "_sanitized.pdf"));
100105
}
101106

102-
private void sanitizeJavaScript(PDDocument document) throws IOException {
107+
private static void sanitizeJavaScript(PDDocument document) throws IOException {
103108
// Get the root dictionary (catalog) of the PDF
104109
PDDocumentCatalog catalog = document.getDocumentCatalog();
105110

@@ -118,7 +123,61 @@ private void sanitizeJavaScript(PDDocument document) throws IOException {
118123
}
119124
}
120125

126+
if (catalog.getOpenAction() instanceof PDActionJavaScript) {
127+
catalog.setOpenAction(null);
128+
}
129+
130+
PDDocumentCatalogAdditionalActions catalogActions = catalog.getActions();
131+
if (catalogActions != null) {
132+
if (catalogActions.getWC() instanceof PDActionJavaScript) {
133+
catalogActions.setWC(null);
134+
}
135+
if (catalogActions.getWS() instanceof PDActionJavaScript) {
136+
catalogActions.setWS(null);
137+
}
138+
if (catalogActions.getDS() instanceof PDActionJavaScript) {
139+
catalogActions.setDS(null);
140+
}
141+
if (catalogActions.getWP() instanceof PDActionJavaScript) {
142+
catalogActions.setWP(null);
143+
}
144+
if (catalogActions.getDP() instanceof PDActionJavaScript) {
145+
catalogActions.setDP(null);
146+
}
147+
}
148+
149+
PDAcroForm acroForm = catalog.getAcroForm();
150+
if (acroForm != null) {
151+
for (PDField field : acroForm.getFields()) {
152+
PDFormFieldAdditionalActions actions = field.getActions();
153+
if (actions != null) {
154+
if (actions.getC() instanceof PDActionJavaScript) {
155+
actions.setC(null);
156+
}
157+
if (actions.getF() instanceof PDActionJavaScript) {
158+
actions.setF(null);
159+
}
160+
if (actions.getK() instanceof PDActionJavaScript) {
161+
actions.setK(null);
162+
}
163+
if (actions.getV() instanceof PDActionJavaScript) {
164+
actions.setV(null);
165+
}
166+
}
167+
}
168+
}
169+
121170
for (PDPage page : document.getPages()) {
171+
PDPageAdditionalActions pageActions = page.getActions();
172+
if (pageActions != null) {
173+
if (pageActions.getO() instanceof PDActionJavaScript) {
174+
pageActions.setO(null);
175+
}
176+
if (pageActions.getC() instanceof PDActionJavaScript) {
177+
pageActions.setC(null);
178+
}
179+
}
180+
122181
for (PDAnnotation annotation : page.getAnnotations()) {
123182
if (annotation instanceof PDAnnotationWidget widget) {
124183
PDAction action = widget.getAction();
@@ -127,41 +186,26 @@ private void sanitizeJavaScript(PDDocument document) throws IOException {
127186
}
128187
}
129188
}
130-
PDAcroForm acroForm = document.getDocumentCatalog().getAcroForm();
131-
if (acroForm != null) {
132-
for (PDField field : acroForm.getFields()) {
133-
PDFormFieldAdditionalActions actions = field.getActions();
134-
if (actions != null) {
135-
if (actions.getC() instanceof PDActionJavaScript) {
136-
actions.setC(null);
137-
}
138-
if (actions.getF() instanceof PDActionJavaScript) {
139-
actions.setF(null);
140-
}
141-
if (actions.getK() instanceof PDActionJavaScript) {
142-
actions.setK(null);
143-
}
144-
if (actions.getV() instanceof PDActionJavaScript) {
145-
actions.setV(null);
146-
}
147-
}
148-
}
149-
}
150189
}
151190
}
152191

153-
private void sanitizeEmbeddedFiles(PDDocument document) {
154-
PDPageTree allPages = document.getPages();
192+
private static void sanitizeEmbeddedFiles(PDDocument document) throws IOException {
193+
PDDocumentCatalog catalog = document.getDocumentCatalog();
194+
PDDocumentNameDictionary names = catalog.getNames();
195+
if (names != null) {
196+
names.setEmbeddedFiles(null);
197+
}
155198

156-
for (PDPage page : allPages) {
157-
PDResources res = page.getResources();
158-
if (res != null && res.getCOSObject() != null) {
159-
res.getCOSObject().removeItem(COSName.getPDFName("EmbeddedFiles"));
199+
for (PDPage page : document.getPages()) {
200+
List<PDAnnotation> annotations = page.getAnnotations();
201+
if (annotations != null && !annotations.isEmpty()) {
202+
annotations.removeIf(
203+
annotation -> annotation instanceof PDAnnotationFileAttachment);
160204
}
161205
}
162206
}
163207

164-
private void sanitizeXMPMetadata(PDDocument document) {
208+
private static void sanitizeXMPMetadata(PDDocument document) {
165209
if (document.getDocumentCatalog() != null) {
166210
PDMetadata metadata = document.getDocumentCatalog().getMetadata();
167211
if (metadata != null) {
@@ -170,15 +214,15 @@ private void sanitizeXMPMetadata(PDDocument document) {
170214
}
171215
}
172216

173-
private void sanitizeDocumentInfoMetadata(PDDocument document) {
217+
private static void sanitizeDocumentInfoMetadata(PDDocument document) {
174218
PDDocumentInformation docInfo = document.getDocumentInformation();
175219
if (docInfo != null) {
176220
PDDocumentInformation newInfo = new PDDocumentInformation();
177221
document.setDocumentInformation(newInfo);
178222
}
179223
}
180224

181-
private void sanitizeLinks(PDDocument document) throws IOException {
225+
private static void sanitizeLinks(PDDocument document) throws IOException {
182226
for (PDPage page : document.getPages()) {
183227
for (PDAnnotation annotation : page.getAnnotations()) {
184228
if (annotation instanceof PDAnnotationLink linkAnnotation) {
@@ -191,7 +235,7 @@ private void sanitizeLinks(PDDocument document) throws IOException {
191235
}
192236
}
193237

194-
private void sanitizeFonts(PDDocument document) {
238+
private static void sanitizeFonts(PDDocument document) {
195239
for (PDPage page : document.getPages()) {
196240
if (page != null
197241
&& page.getResources() != null

0 commit comments

Comments
 (0)