Skip to content

Commit 79262b6

Browse files
committed
support reading HSSFWorkbook with password passed as param
1 parent df5604a commit 79262b6

File tree

5 files changed

+173
-37
lines changed

5 files changed

+173
-37
lines changed

poi/src/main/java/org/apache/poi/hssf/record/RecordFactory.java

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -172,24 +172,37 @@ public static short[] getAllKnownRecordSIDs() {
172172
* Create an array of records from an input stream
173173
*
174174
* @param in the InputStream from which the records will be obtained
175-
*
176175
* @return a list of Records created from the InputStream
177-
*
178176
* @throws org.apache.poi.util.RecordFormatException on error processing the InputStream
179177
*/
180178
public static List<org.apache.poi.hssf.record.Record> createRecords(InputStream in) throws RecordFormatException {
179+
return createRecords(in, null);
180+
}
181+
182+
/**
183+
* Create an array of records from an input stream
184+
*
185+
* @param in the InputStream from which the records will be obtained
186+
* @param password in char array format (can be null)
187+
* @return a list of Records created from the InputStream
188+
* @throws org.apache.poi.util.RecordFormatException on error processing the InputStream
189+
* @since 6.0.0
190+
*/
191+
public static List<org.apache.poi.hssf.record.Record> createRecords(
192+
InputStream in, char[] password) throws RecordFormatException {
181193

182194
List<org.apache.poi.hssf.record.Record> records = new ArrayList<>(NUM_RECORDS);
183195

184-
RecordFactoryInputStream recStream = new RecordFactoryInputStream(in, true);
196+
RecordFactoryInputStream recStream = new RecordFactoryInputStream(in, true, password);
185197

186198
Record record;
187-
while ((record = recStream.nextRecord())!=null) {
199+
while ((record = recStream.nextRecord()) != null) {
188200
records.add(record);
189201

190202
IOUtils.safelyAllocateCheck(records.size(), MAX_NUMBER_OF_RECORDS);
191203
}
192204

193205
return records;
194206
}
207+
195208
}

poi/src/main/java/org/apache/poi/hssf/record/RecordFactoryInputStream.java

Lines changed: 56 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -102,19 +102,38 @@ public StreamEncryptionInfo(RecordInputStream rs, List<org.apache.poi.hssf.recor
102102
_lastRecord = rec;
103103
}
104104

105+
/**
106+
* This method requires that you store the password in {@link Biff8EncryptionKey}.
107+
* Since 6.0.0, we have overloaded methods where you can pass the password as a param instead.
108+
*/
105109
@SuppressWarnings({"squid:S2068"})
106110
public RecordInputStream createDecryptingStream(InputStream original) {
107-
String userPassword = Biff8EncryptionKey.getCurrentUserPassword();
108-
if (userPassword == null) {
109-
userPassword = Decryptor.DEFAULT_PASSWORD;
110-
}
111+
return createDecryptingStream(original, (String) null);
112+
}
111113

114+
/**
115+
* @since 6.0.0
116+
*/
117+
public RecordInputStream createDecryptingStream(InputStream original, char[] password) {
118+
return createDecryptingStream(original, new String(password));
119+
}
120+
121+
/**
122+
* @since 6.0.0
123+
*/
124+
public RecordInputStream createDecryptingStream(InputStream original, String password) {
125+
if (password == null) {
126+
password = Biff8EncryptionKey.getCurrentUserPassword();
127+
if (password == null) {
128+
password = Decryptor.DEFAULT_PASSWORD;
129+
}
130+
}
112131
EncryptionInfo info = _filePassRec.getEncryptionInfo();
113132
try {
114-
if (!info.getDecryptor().verifyPassword(userPassword)) {
133+
if (!info.getDecryptor().verifyPassword(password)) {
115134
throw new EncryptedDocumentException(
116-
(Decryptor.DEFAULT_PASSWORD.equals(userPassword) ? "Default" : "Supplied")
117-
+ " password is invalid for salt/verifier/verifierHash");
135+
(Decryptor.DEFAULT_PASSWORD.equals(password) ? "Default" : "Supplied")
136+
+ " password is invalid for salt/verifier/verifierHash");
118137
}
119138
} catch (GeneralSecurityException e) {
120139
throw new EncryptedDocumentException(e);
@@ -176,17 +195,29 @@ public boolean hasBOFRecord() {
176195

177196
/**
178197
* @param in the InputStream to read from
179-
*
180198
* @param shouldIncludeContinueRecords caller can pass <code>false</code> if loose
181199
* {@link ContinueRecord}s should be skipped (this is sometimes useful in event based
182200
* processing).
183201
*/
184202
public RecordFactoryInputStream(InputStream in, boolean shouldIncludeContinueRecords) {
203+
this(in, shouldIncludeContinueRecords, null);
204+
}
205+
206+
/**
207+
* @param in the InputStream to read from
208+
* @param shouldIncludeContinueRecords caller can pass <code>false</code> if loose
209+
* @param password password in char array format (can be null)
210+
* {@link ContinueRecord}s should be skipped (this is sometimes useful in event based
211+
* processing).
212+
* @since 6.0.0
213+
*/
214+
public RecordFactoryInputStream(InputStream in, boolean shouldIncludeContinueRecords,
215+
char[] password) {
185216
RecordInputStream rs = new RecordInputStream(in);
186217
List<org.apache.poi.hssf.record.Record> records = new ArrayList<>();
187218
StreamEncryptionInfo sei = new StreamEncryptionInfo(rs, records);
188219
if (sei.hasEncryption()) {
189-
rs = sei.createDecryptingStream(in);
220+
rs = sei.createDecryptingStream(in, password);
190221
} else {
191222
// typical case - non-encrypted stream
192223
}
@@ -201,22 +232,22 @@ public RecordFactoryInputStream(InputStream in, boolean shouldIncludeContinueRec
201232
_lastRecord = sei.getLastRecord();
202233

203234
/*
204-
* How to recognise end of stream?
205-
* In the best case, the underlying input stream (in) ends just after the last EOF record
206-
* Usually however, the stream is padded with an arbitrary byte count. Excel and most apps
207-
* reliably use zeros for padding and if this were always the case, this code could just
208-
* skip all the (zero sized) records with sid==0. However, bug 46987 shows a file with
209-
* non-zero padding that is read OK by Excel (Excel also fixes the padding).
210-
*
211-
* So to properly detect the workbook end of stream, this code has to identify the last
212-
* EOF record. This is not so easy because the worbook bof+eof pair do not bracket the
213-
* whole stream. The worksheets follow the workbook, but it is not easy to tell how many
214-
* sheet sub-streams should be present. Hence we are looking for an EOF record that is not
215-
* immediately followed by a BOF record. One extra complication is that bof+eof sub-
216-
* streams can be nested within worksheet streams and it's not clear in these cases what
217-
* record might follow any EOF record. So we also need to keep track of the bof/eof
218-
* nesting level.
219-
*/
235+
* How to recognise end of stream?
236+
* In the best case, the underlying input stream (in) ends just after the last EOF record
237+
* Usually however, the stream is padded with an arbitrary byte count. Excel and most apps
238+
* reliably use zeros for padding and if this were always the case, this code could just
239+
* skip all the (zero sized) records with sid==0. However, bug 46987 shows a file with
240+
* non-zero padding that is read OK by Excel (Excel also fixes the padding).
241+
*
242+
* So to properly detect the workbook end of stream, this code has to identify the last
243+
* EOF record. This is not so easy because the workbook bof+eof pair do not bracket the
244+
* whole stream. The worksheets follow the workbook, but it is not easy to tell how many
245+
* sheet sub-streams should be present. Hence we are looking for an EOF record that is not
246+
* immediately followed by a BOF record. One extra complication is that bof+eof sub-
247+
* streams can be nested within worksheet streams and it's not clear in these cases what
248+
* record might follow any EOF record. So we also need to keep track of the bof/eof
249+
* nesting level.
250+
*/
220251
_bofDepth = sei.hasBOFRecord() ? 1 : 0;
221252
_lastRecordWasEOFLevelZero = false;
222253
}

poi/src/main/java/org/apache/poi/hssf/usermodel/HSSFWorkbook.java

Lines changed: 81 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,6 @@ Licensed to the Apache Software Foundation (ASF) under one or more
113113
import org.apache.poi.util.LittleEndian;
114114
import org.apache.poi.util.LittleEndianByteArrayInputStream;
115115
import org.apache.poi.util.LittleEndianByteArrayOutputStream;
116-
import org.apache.poi.util.Removal;
117116

118117
/**
119118
* High level representation of a workbook. This is the first object most users
@@ -205,6 +204,12 @@ public final class HSSFWorkbook extends POIDocument implements Workbook {
205204
*/
206205
private final UDFFinder _udfFinder = new IndexedUDFFinder(AggregatingUDFFinder.DEFAULT);
207206

207+
/**
208+
* The password needed to decrypt this workbook.
209+
* @since 6.0.0
210+
*/
211+
private char[] passwordChars;
212+
208213
public static HSSFWorkbook create(InternalWorkbook book) {
209214
return new HSSFWorkbook(book);
210215
}
@@ -325,7 +330,7 @@ public static String getWorkbookDirEntryName(DirectoryNode directory) {
325330
* low level models. If you're reading in a workbook...start here.
326331
*
327332
* @param directory the POI filesystem directory to process from
328-
* @param fs the POI filesystem that contains the Workbook stream.
333+
* @param fs the POI filesystem that contains the Workbook stream (ignored).
329334
* @param preserveNodes whether to preserve other nodes, such as
330335
* macros. This takes more memory, so only say yes if you
331336
* need to. If set, will store all of the POIFSFileSystem
@@ -334,6 +339,7 @@ public static String getWorkbookDirEntryName(DirectoryNode directory) {
334339
* @throws IllegalStateException a number of runtime exceptions can be thrown, especially if there are problems with the
335340
* input format
336341
* @see POIFSFileSystem
342+
* @deprecated the <code>fs</code> param is ignored - use the constructor that omits it
337343
*/
338344
public HSSFWorkbook(DirectoryNode directory, POIFSFileSystem fs, boolean preserveNodes)
339345
throws IOException {
@@ -357,10 +363,33 @@ public HSSFWorkbook(DirectoryNode directory, POIFSFileSystem fs, boolean preserv
357363
*/
358364
public HSSFWorkbook(DirectoryNode directory, boolean preserveNodes)
359365
throws IOException {
366+
this(directory, preserveNodes, null);
367+
}
368+
369+
/**
370+
* given a POI POIFSFileSystem object, and a specific directory
371+
* within it, read in its Workbook and populate the high and
372+
* low level models. If you're reading in a workbook...start here.
373+
*
374+
* @param directory the POI filesystem directory to process from
375+
* @param preserveNodes whether to preserve other nodes, such as
376+
* macros. This takes more memory, so only say yes if you
377+
* need to. If set, will store all of the POIFSFileSystem
378+
* in memory
379+
* @param password in char array format (can be null)
380+
* @throws IOException if the stream cannot be read
381+
* @throws IllegalStateException a number of runtime exceptions can be thrown, especially if there are problems with the
382+
* input format
383+
* @see POIFSFileSystem
384+
* @since 6.0.0
385+
*/
386+
public HSSFWorkbook(DirectoryNode directory, boolean preserveNodes, char[] password)
387+
throws IOException {
360388
super(directory);
361389
String workbookName = getWorkbookDirEntryName(directory);
362390

363391
this.preserveNodes = preserveNodes;
392+
this.passwordChars = password;
364393

365394
// If we're not preserving nodes, don't track the
366395
// POIFS any more
@@ -375,7 +404,7 @@ public HSSFWorkbook(DirectoryNode directory, boolean preserveNodes)
375404
// it happens to be spelled.
376405
InputStream stream = directory.createDocumentInputStream(workbookName);
377406

378-
List<org.apache.poi.hssf.record.Record> records = RecordFactory.createRecords(stream);
407+
List<org.apache.poi.hssf.record.Record> records = RecordFactory.createRecords(stream, password);
379408

380409
workbook = InternalWorkbook.createWorkbook(records);
381410
setPropertiesFromWorkbook(workbook);
@@ -418,6 +447,24 @@ public HSSFWorkbook(InputStream s) throws IOException {
418447
this(s, true);
419448
}
420449

450+
/**
451+
* Companion to HSSFWorkbook(POIFSFileSystem), this constructs the
452+
* POI filesystem around your {@link InputStream}, including all nodes.
453+
* <p>This calls {@link #HSSFWorkbook(InputStream, boolean)} with
454+
* preserve nodes set to true.
455+
*
456+
* @throws IOException if the stream cannot be read
457+
* @throws IllegalStateException a number of runtime exceptions can be thrown, especially if there are problems with the
458+
* input format
459+
* @see #HSSFWorkbook(InputStream, boolean)
460+
* @see #HSSFWorkbook(POIFSFileSystem)
461+
* @see POIFSFileSystem
462+
* @since 6.0.0
463+
*/
464+
public HSSFWorkbook(InputStream s, char[] password) throws IOException {
465+
this(s, true, password);
466+
}
467+
421468
/**
422469
* Companion to HSSFWorkbook(POIFSFileSystem), this constructs the
423470
* POI filesystem around your {@link InputStream}.
@@ -439,9 +486,30 @@ public HSSFWorkbook(InputStream s, boolean preserveNodes)
439486
}
440487

441488
/**
442-
* used internally to set the workbook properties.
489+
* Companion to HSSFWorkbook(POIFSFileSystem), this constructs the
490+
* POI filesystem around your {@link InputStream}.
491+
*
492+
* @param s the POI filesystem that contains the Workbook stream.
493+
* @param preserveNodes whether to preserve other nodes, such as
494+
* macros. This takes more memory, so only say yes if you
495+
* need to.
496+
* @param password in char array format (can be null)
497+
* @throws IOException if the stream cannot be read
498+
* @throws IllegalStateException a number of runtime exceptions can be thrown, especially if there are problems with the
499+
* input format
500+
* @see POIFSFileSystem
501+
* @see #HSSFWorkbook(POIFSFileSystem)
502+
* @since 6.0.0
443503
*/
504+
@SuppressWarnings("resource") // POIFSFileSystem always closes the stream
505+
public HSSFWorkbook(InputStream s, boolean preserveNodes, char[] password)
506+
throws IOException {
507+
this(new POIFSFileSystem(s).getRoot(), preserveNodes, password);
508+
}
444509

510+
/**
511+
* used internally to set the workbook properties.
512+
*/
445513
private void setPropertiesFromWorkbook(InternalWorkbook book) {
446514
this.workbook = book;
447515

@@ -1480,7 +1548,6 @@ public int serialize(int offset, byte[] data) {
14801548
}
14811549
}
14821550

1483-
14841551
/**
14851552
* Method getBytes - get the bytes of just the HSSF portions of the XLS file.
14861553
* Use this to construct a POI POIFSFileSystem yourself.
@@ -1494,7 +1561,14 @@ public byte[] getBytes() {
14941561
HSSFSheet[] sheets = getSheets();
14951562
int nSheets = sheets.length;
14961563

1497-
updateEncryptionInfo();
1564+
String pwdString;
1565+
if (passwordChars != null) {
1566+
pwdString = new String(passwordChars);
1567+
} else {
1568+
// from POI 6.0.0, using Biff8EncryptionKey is discouraged
1569+
pwdString = Biff8EncryptionKey.getCurrentUserPassword();
1570+
}
1571+
updateEncryptionInfo(pwdString);
14981572

14991573
// before getting the workbook size we must tell the sheets that
15001574
// serialization is about to occur.
@@ -2327,12 +2401,11 @@ public EncryptionInfo getEncryptionInfo() {
23272401
}
23282402

23292403

2330-
private void updateEncryptionInfo() {
2404+
private void updateEncryptionInfo(String password) {
23312405
// make sure, that we've read all the streams ...
23322406
readProperties();
23332407
FilePassRecord fpr = (FilePassRecord) workbook.findFirstRecordBySid(FilePassRecord.sid);
23342408

2335-
String password = Biff8EncryptionKey.getCurrentUserPassword();
23362409
WorkbookRecordList wrl = workbook.getWorkbookRecordList();
23372410
if (password == null) {
23382411
if (fpr != null) {

poi/src/test/java/org/apache/poi/hssf/HSSFTestDataSamples.java

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,15 @@ public static HSSFWorkbook openSampleWorkbook(String sampleFileName) {
4949
throw new RuntimeException(e);
5050
}
5151
}
52+
53+
public static HSSFWorkbook openSampleWorkbook(String sampleFileName, char[] password) {
54+
try (InputStream stream = _inst.openResourceAsStream(sampleFileName)){
55+
return new HSSFWorkbook(stream, password);
56+
} catch (IOException e) {
57+
throw new RuntimeException(e);
58+
}
59+
}
60+
5261
/**
5362
* Writes a spreadsheet to a {@code ByteArrayOutputStream} and reads it back
5463
* from a {@code ByteArrayInputStream}.<p>

poi/src/test/java/org/apache/poi/hssf/usermodel/TestHSSFWorkbook.java

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1249,6 +1249,16 @@ void testWorkbookCloseClosesInputStream() throws Exception {
12491249
}
12501250
}
12511251

1252+
@Test
1253+
void testPassword() throws Exception {
1254+
try (HSSFWorkbook wb = HSSFTestDataSamples.openSampleWorkbook(
1255+
"xor-encryption-abc.xls", "abc".toCharArray())) {
1256+
HSSFSheet hssfSheet = wb.getSheetAt(0);
1257+
double a1Value = hssfSheet.getRow(0).getCell(0).getNumericCellValue();
1258+
assertEquals(1.0, a1Value);
1259+
}
1260+
}
1261+
12521262
private static class WrappedStream extends FilterInputStream {
12531263
private boolean closed;
12541264

0 commit comments

Comments
 (0)