Skip to content

Commit ccb0e7c

Browse files
committed
Add support for PPMd compression.
1 parent c453db0 commit ccb0e7c

File tree

7 files changed

+224
-0
lines changed

7 files changed

+224
-0
lines changed

py7zlib.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,7 @@ def bytes(s, encoding):
149149
COMPRESSION_METHOD_BCJ_ARMT = unhexlify('03030701') # '\x03\x03\x07\x01'
150150
COMPRESSION_METHOD_BCJ_SPARC = unhexlify('03030805') # '\x03\x03\x08\x05'
151151
COMPRESSION_METHOD_BCJ2 = unhexlify('0303011B') # '\x03\x03\x01\x1B'
152+
COMPRESSION_METHOD_PPMD = unhexlify('030401') # '\x03\x03\x01'
152153

153154
FILE_ATTRIBUTE_DIRECTORY = 0x10
154155
FILE_ATTRIBUTE_READONLY = 0x01
@@ -619,6 +620,7 @@ def __init__(self, info, start, src_start, folder, archive, maxsize=None):
619620
COMPRESSION_METHOD_BCJ_ARMT: '_read_bcj_armt',
620621
COMPRESSION_METHOD_BCJ_SPARC: '_read_bcj_sparc',
621622
COMPRESSION_METHOD_BCJ2: '_read_bcj2',
623+
COMPRESSION_METHOD_PPMD: '_read_ppmd',
622624
}
623625

624626
def _is_encrypted(self):
@@ -861,6 +863,15 @@ def _read_bcj_sparc(self, coder, input, level, num_coders):
861863
data = pylzma.bcj_sparc_convert(input)
862864
return data[self._start:self._start+size]
863865

866+
def _read_ppmd(self, coder, input, level, num_coders):
867+
size = self._uncompressed[level]
868+
if not input:
869+
self._file.seek(self._src_start)
870+
input = self._file.read(self.compressed)
871+
total_out = sum(self._unpacksizes)
872+
data = pylzma.ppmd_decompress(input, coder['properties'], total_out)
873+
return data[self._start:self._start+size]
874+
864875
def checkcrc(self):
865876
if self.digest is None:
866877
return True
@@ -998,11 +1009,13 @@ def __init__(self, file, password=None):
9981009
for coder in folder.coders:
9991010
numinstreams = max(numinstreams, coder.get('numinstreams', 1))
10001011
info['_packsizes'] = packinfo.packsizes[instreamindex:instreamindex+numinstreams]
1012+
info['_unpacksizes'] = unpacksizes
10011013
streamidx += 1
10021014
else:
10031015
info['compressed'] = 0
10041016
info['_uncompressed'] = [0]
10051017
info['_packsizes'] = [0]
1018+
info['_unpacksizes'] = [0]
10061019
folder = None
10071020
maxsize = 0
10081021
numinstreams = 1

setup.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,8 @@ def build_extension(self, ext):
164164
'src/sdk/C/Lzma2Dec.c',
165165
'src/sdk/C/Lzma2Enc.c',
166166
'src/sdk/C/Sha256.c',
167+
'src/sdk/C/Ppmd7.c',
168+
'src/sdk/C/Ppmd7Dec.c',
167169
)
168170
if ENABLE_COMPATIBILITY:
169171
c_files += (

src/pylzma/pylzma.c

Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
#include "../sdk/C/Bra.h"
3232
#include "../sdk/C/Bcj2.h"
3333
#include "../sdk/C/Delta.h"
34+
#include "../sdk/C/Ppmd7.h"
3435

3536
#include "pylzma.h"
3637
#include "pylzma_compress.h"
@@ -45,6 +46,7 @@
4546
#include "pylzma_decompress_compat.h"
4647
#include "pylzma_decompressobj_compat.h"
4748
#endif
49+
#include "pylzma_streams.h"
4850

4951
#if defined(WITH_THREAD) && !defined(PYLZMA_USE_GILSTATE)
5052
PyInterpreterState* _pylzma_interpreterState = NULL;
@@ -350,6 +352,137 @@ pylzma_delta_encode(PyObject *self, PyObject *args)
350352
return result;
351353
}
352354

355+
const char
356+
doc_ppmd_decompress[] =
357+
"ppmd_decompress(data, properties, outsize) -- Decompress PPMd stream.";
358+
359+
typedef struct
360+
{
361+
IByteIn vt;
362+
const Byte *cur;
363+
const Byte *end;
364+
const Byte *begin;
365+
UInt64 processed;
366+
BoolInt extra;
367+
SRes res;
368+
const ILookInStream *inStream;
369+
} CByteInToLook;
370+
371+
static Byte
372+
ReadByte(const IByteIn *pp) {
373+
CByteInToLook *p = CONTAINER_FROM_VTBL(pp, CByteInToLook, vt);
374+
if (p->cur != p->end) {
375+
return *p->cur++;
376+
}
377+
378+
if (p->res == SZ_OK) {
379+
size_t size = p->cur - p->begin;
380+
p->processed += size;
381+
p->res = ILookInStream_Skip(p->inStream, size);
382+
size = (1 << 25);
383+
p->res = ILookInStream_Look(p->inStream, (const void **)&p->begin, &size);
384+
p->cur = p->begin;
385+
p->end = p->begin + size;
386+
if (size != 0) {
387+
return *p->cur++;;
388+
}
389+
}
390+
p->extra = True;
391+
return 0;
392+
}
393+
394+
static PyObject *
395+
pylzma_ppmd_decompress(PyObject *self, PyObject *args)
396+
{
397+
char *data;
398+
PARSE_LENGTH_TYPE length;
399+
char *props;
400+
PARSE_LENGTH_TYPE propssize;
401+
unsigned int outsize;
402+
PyObject *result;
403+
Byte *tmp;
404+
unsigned order;
405+
UInt32 memSize;
406+
CPpmd7 ppmd;
407+
CPpmd7z_RangeDec rc;
408+
CByteInToLook s;
409+
SRes res = SZ_OK;
410+
CMemoryLookInStream stream;
411+
412+
if (!PyArg_ParseTuple(args, "s#s#I", &data, &length, &props, &propssize, &outsize)) {
413+
return NULL;
414+
}
415+
416+
if (propssize != 5) {
417+
PyErr_Format(PyExc_TypeError, "properties must be exactly 5 bytes, got %ld", propssize);
418+
return NULL;
419+
}
420+
421+
order = props[0];
422+
memSize = GetUi32(props + 1);
423+
if (order < PPMD7_MIN_ORDER ||
424+
order > PPMD7_MAX_ORDER ||
425+
memSize < PPMD7_MIN_MEM_SIZE ||
426+
memSize > PPMD7_MAX_MEM_SIZE) {
427+
PyErr_SetString(PyExc_TypeError, "unsupporter compression properties");
428+
return NULL;
429+
}
430+
431+
if (!outsize) {
432+
return PyBytes_FromString("");
433+
}
434+
435+
Ppmd7_Construct(&ppmd);
436+
if (!Ppmd7_Alloc(&ppmd, memSize, &allocator)) {
437+
return PyErr_NoMemory();
438+
}
439+
Ppmd7_Init(&ppmd, order);
440+
441+
result = PyBytes_FromStringAndSize(NULL, outsize);
442+
if (!result) {
443+
return NULL;
444+
}
445+
446+
CreateMemoryLookInStream(&stream, (Byte*) data, length);
447+
tmp = (Byte *) PyBytes_AS_STRING(result);
448+
Py_BEGIN_ALLOW_THREADS
449+
Ppmd7z_RangeDec_CreateVTable(&rc);
450+
s.vt.Read = ReadByte;
451+
s.inStream = &stream.s;
452+
s.begin = s.end = s.cur = NULL;
453+
s.extra = False;
454+
s.res = SZ_OK;
455+
s.processed = 0;
456+
rc.Stream = &s.vt;
457+
if (!Ppmd7z_RangeDec_Init(&rc)) {
458+
res = SZ_ERROR_DATA;
459+
} else if (s.extra) {
460+
res = (s.res != SZ_OK ? s.res : SZ_ERROR_DATA);
461+
} else {
462+
SizeT i;
463+
for (i = 0; i < outsize; i++) {
464+
int sym = Ppmd7_DecodeSymbol(&ppmd, &rc.vt);
465+
if (s.extra || sym < 0) {
466+
break;
467+
}
468+
tmp[i] = (Byte)sym;
469+
}
470+
if (i != outsize) {
471+
res = (s.res != SZ_OK ? s.res : SZ_ERROR_DATA);
472+
} else if (s.processed + (s.cur - s.begin) != length || !Ppmd7z_RangeDec_IsFinishedOK(&rc)) {
473+
res = SZ_ERROR_DATA;
474+
}
475+
}
476+
Py_END_ALLOW_THREADS
477+
Ppmd7_Free(&ppmd, &allocator);
478+
if (res != SZ_OK) {
479+
Py_DECREF(result);
480+
PyErr_SetString(PyExc_TypeError, "error during decompression");
481+
result = NULL;
482+
}
483+
return result;
484+
}
485+
353486
PyMethodDef
354487
methods[] = {
355488
// exported functions
@@ -372,6 +505,8 @@ methods[] = {
372505
// Delta
373506
{"delta_decode", (PyCFunction)pylzma_delta_decode, METH_VARARGS, (char *)&doc_delta_decode},
374507
{"delta_encode", (PyCFunction)pylzma_delta_encode, METH_VARARGS, (char *)&doc_delta_encode},
508+
// PPMd
509+
{"ppmd_decompress", (PyCFunction)pylzma_ppmd_decompress, METH_VARARGS, (char *)&doc_ppmd_decompress},
375510
{NULL, NULL},
376511
};
377512

src/pylzma/pylzma_streams.c

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -177,3 +177,63 @@ MemoryOutStreamDiscard(CMemoryOutStream *stream, size_t size)
177177
stream->size -= size;
178178
}
179179
}
180+
181+
static SRes
182+
MemoryLookInStream_Read(const ILookInStream *p, void *buf, size_t *size)
183+
{
184+
CMemoryLookInStream *self = (CMemoryLookInStream *) p;
185+
size_t toread = *size;
186+
if (toread > self->avail) {
187+
toread = self->avail;
188+
}
189+
memcpy(buf, self->data, toread);
190+
self->data += toread;
191+
self->avail -= toread;
192+
*size = toread;
193+
return SZ_OK;
194+
}
195+
196+
static SRes
197+
MemoryLookInStream_Look(const ILookInStream *p, const void **buf, size_t *size)
198+
{
199+
CMemoryLookInStream *self = (CMemoryLookInStream *) p;
200+
size_t toread = *size;
201+
if (toread > self->avail) {
202+
toread = self->avail;
203+
}
204+
*buf = self->data;
205+
*size = toread;
206+
return SZ_OK;
207+
}
208+
209+
static SRes
210+
MemoryLookInStream_Skip(const ILookInStream *p, size_t offset)
211+
{
212+
CMemoryLookInStream *self = (CMemoryLookInStream *) p;
213+
size_t toread = offset;
214+
if (toread > self->avail) {
215+
toread = self->avail;
216+
}
217+
self->data += toread;
218+
self->avail -= toread;
219+
return SZ_OK;
220+
}
221+
222+
static SRes
223+
MemoryLookInStream_Seek(const ILookInStream *p, Int64 *pos, ESzSeek origin)
224+
{
225+
CMemoryLookInStream *self = (CMemoryLookInStream *) p;
226+
printf("XXX\n");
227+
return SZ_ERROR_UNSUPPORTED;
228+
}
229+
230+
void
231+
CreateMemoryLookInStream(CMemoryLookInStream *stream, Byte *data, size_t size)
232+
{
233+
stream->s.Read = MemoryLookInStream_Read;
234+
stream->s.Look = MemoryLookInStream_Look;
235+
stream->s.Skip = MemoryLookInStream_Skip;
236+
stream->s.Seek = MemoryLookInStream_Seek;
237+
stream->data = data;
238+
stream->avail = size;
239+
}

src/pylzma/pylzma_streams.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,4 +64,14 @@ typedef struct
6464
void CreateMemoryOutStream(CMemoryOutStream *stream);
6565
void MemoryOutStreamDiscard(CMemoryOutStream *stream, size_t size);
6666

67+
typedef struct
68+
{
69+
ILookInStream s;
70+
Byte *data;
71+
size_t size;
72+
size_t avail;
73+
} CMemoryLookInStream;
74+
75+
void CreateMemoryLookInStream(CMemoryLookInStream *stream, Byte *data, size_t size);
76+
6777
#endif

tests/data/ppmd.7z

228 Bytes
Binary file not shown.

tests/test_7zfiles.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -225,6 +225,10 @@ def test_lzma_bcj_sparc(self):
225225
# test loading of lzma compressed, filtered through bcj / SPARC
226226
self._test_archive('lzma_bcj_sparc.7z')
227227

228+
def test_ppmd(self):
229+
# test loading of lzma compressed, filtered through bcj / SPARC
230+
self._test_archive('ppmd.7z')
231+
228232
def test_regress_1(self):
229233
# prevent regression bug #1 reported by mail
230234
fp = self._open_file(os.path.join(ROOT, 'data', 'regress_1.7z'), 'rb')

0 commit comments

Comments
 (0)