mirror of
https://github.com/AsahiLinux/m1n1
synced 2024-11-21 14:23:01 +00:00
proxy: add XZ and GZ decompression functions and code
This embeds tinf and minlzma. Signed-off-by: Hector Martin <marcan@marcan.st>
This commit is contained in:
parent
afc5813635
commit
ee12d053a9
26 changed files with 3826 additions and 7 deletions
|
@ -1,4 +1,4 @@
|
|||
Copyright (c) <YEAR>, <OWNER>
|
||||
Copyright (C) 2006 David Gibson, IBM Corporation.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
21
3rdparty_licenses/LICENSE.minlzma
Normal file
21
3rdparty_licenses/LICENSE.minlzma
Normal file
|
@ -0,0 +1,21 @@
|
|||
MIT License
|
||||
|
||||
Copyright (c) 2020 Alex Ionescu
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
22
3rdparty_licenses/LICENSE.tinf
Normal file
22
3rdparty_licenses/LICENSE.tinf
Normal file
|
@ -0,0 +1,22 @@
|
|||
The zlib License (Zlib)
|
||||
|
||||
Copyright (c) 2003-2019 Joergen Ibsen
|
||||
|
||||
This software is provided 'as-is', without any express or implied
|
||||
warranty. In no event will the authors be held liable for any damages
|
||||
arising from the use of this software.
|
||||
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it
|
||||
freely, subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must
|
||||
not claim that you wrote the original software. If you use this
|
||||
software in a product, an acknowledgment in the product
|
||||
documentation would be appreciated but is not required.
|
||||
|
||||
2. Altered source versions must be plainly marked as such, and must
|
||||
not be misrepresented as being the original software.
|
||||
|
||||
3. This notice may not be removed or altered from any source
|
||||
distribution.
|
12
Makefile
12
Makefile
|
@ -3,15 +3,21 @@ ARCH := aarch64-linux-gnu-
|
|||
CFLAGS := -O2 -Wall -Wundef -Werror=strict-prototypes -fno-common -fno-PIE \
|
||||
-Werror=implicit-function-declaration -Werror=implicit-int \
|
||||
-ffreestanding -fpic -ffunction-sections -fdata-sections \
|
||||
-fno-stack-protector -mgeneral-regs-only
|
||||
-fno-stack-protector -mgeneral-regs-only -mstrict-align
|
||||
|
||||
LDFLAGS := -T m1n1.ld -EL -maarch64elf --no-undefined -X -Bsymbolic \
|
||||
-z notext --no-apply-dynamic-relocs --orphan-handling=warn --strip-debug \
|
||||
-z nocopyreloc --gc-sections -pie
|
||||
|
||||
MINILZLIB_OBJECTS := $(patsubst %,minilzlib/%, \
|
||||
dictbuf.o inputbuf.o lzma2dec.o lzmadec.o rangedec.o xzstream.o)
|
||||
|
||||
TINF_OBJECTS := $(patsubst %,tinf/%, \
|
||||
adler32.o crc32.o tinfgzip.o tinflate.o tinfzlib.o)
|
||||
|
||||
OBJECTS := adt.o bootlogo_128.o bootlogo_256.o exception.o exception_asm.o fb.o \
|
||||
main.o memory.o proxy.o start.o startup.o string.o uart.o uartproxy.o utils.o \
|
||||
utils_asm.o vsprintf.o
|
||||
utils_asm.o vsprintf.o $(MINILZLIB_OBJECTS) $(TINF_OBJECTS)
|
||||
|
||||
BUILD_OBJS := $(patsubst %,build/%,$(OBJECTS))
|
||||
NAME := m1n1
|
||||
|
@ -41,11 +47,13 @@ format:
|
|||
build/%.o: src/%.S
|
||||
@echo " AS $@"
|
||||
@mkdir -p $(DEPDIR)
|
||||
@mkdir -p "$(basename $@)"
|
||||
@$(AS) -c $(CFLAGS) -Wp,-MMD,$(DEPDIR)/$(*F).d,-MQ,"$@",-MP -o $@ $<
|
||||
|
||||
build/%.o: src/%.c
|
||||
@echo " CC $@"
|
||||
@mkdir -p $(DEPDIR)
|
||||
@mkdir -p "$(basename $@)"
|
||||
@$(CC) -c $(CFLAGS) -Wp,-MMD,$(DEPDIR)/$(*F).d,-MQ,"$@",-MP -o $@ $<
|
||||
|
||||
build/$(NAME).elf: $(BUILD_OBJS) m1n1.ld
|
||||
|
|
13
README.md
13
README.md
|
@ -16,10 +16,19 @@ Portions of m1n1 are based on mini:
|
|||
* Copyright (C) 2008-2010 Sven Peter <sven@svenpeter.dev>
|
||||
* Copyright (C) 2008-2010 Andre Heider <a.heider@gmail.com>
|
||||
|
||||
m1n1 embeds libfdt, which is dual [BSD](LICENSE.BSD-2) and
|
||||
[GPL-2](LICENSE.GPL-2) licensed and copyright:
|
||||
m1n1 embeds libfdt, which is dual [BSD](3rdparty_licenses/LICENSE.BSD-2.libfdt) and
|
||||
[GPL-2](3rdparty_licenses/LICENSE.GPL-2) licensed and copyright:
|
||||
|
||||
* Copyright (C) 2006 David Gibson, IBM Corporation.
|
||||
|
||||
The ADT code in mini is also based on libfdt and subject to the same license.
|
||||
|
||||
m1n1 embeds [minlzma](https://github.com/ionescu007/minlzma), which is
|
||||
[MIT](3rdparty_licenses/LICENSE.minlzma) licensed and copyright:
|
||||
|
||||
* Copyright (c) 2020 Alex Ionescu
|
||||
|
||||
m1n1 embeds a slightly modified version of [tinf](https://github.com/jibsen/tinf), which is
|
||||
[ZLIB](3rdparty_licenses/LICENSE.tinf) licensed and copyright:
|
||||
|
||||
* Copyright (c) 2003-2019 Joergen Ibsen
|
||||
|
|
|
@ -290,11 +290,14 @@ class M1N1Proxy:
|
|||
P_DC_CVAU = 0x309
|
||||
P_DC_CIVAC = 0x30a
|
||||
|
||||
P_XZDEC = 0x400
|
||||
P_GZDEC = 0x401
|
||||
|
||||
def __init__(self, iface, debug=False):
|
||||
self.debug = debug
|
||||
self.iface = iface
|
||||
|
||||
def request(self, opcode, *args, reboot=False):
|
||||
def request(self, opcode, *args, reboot=False, signed=False):
|
||||
if len(args) > 6:
|
||||
raise ValueError("Too many arguments")
|
||||
args = list(args) + [0] * (6 - len(args))
|
||||
|
@ -302,7 +305,8 @@ class M1N1Proxy:
|
|||
if self.debug:
|
||||
print("<<<< %08x: %08x %08x %08x %08x %08x %08x"%tuple([opcode] + args))
|
||||
reply = self.iface.proxyreq(req, reboot=reboot)
|
||||
rop, status, retval = struct.unpack("<QqQ", reply)
|
||||
ret_fmt = "q" if signed else "Q"
|
||||
rop, status, retval = struct.unpack("<Qq" + ret_fmt, reply)
|
||||
if self.debug:
|
||||
print(">>>> %08x: %d %08x"%(rop, status, retval))
|
||||
if reboot:
|
||||
|
@ -467,6 +471,14 @@ class M1N1Proxy:
|
|||
def dc_civac(self, addr, size):
|
||||
self.request(self.P_DC_CIVAC, addr, size)
|
||||
|
||||
def xzdec(self, inbuf, insize, outbuf=0, outsize=0):
|
||||
return self.request(self.P_XZDEC, inbuf, insize, outbuf,
|
||||
outsize, signed=True)
|
||||
|
||||
def gzdec(self, inbuf, insize, outbuf, outsize):
|
||||
return self.request(self.P_GZDEC, inbuf, insize, outbuf,
|
||||
outsize, signed=True)
|
||||
|
||||
if __name__ == "__main__":
|
||||
import serial
|
||||
uartdev = os.environ.get("M1N1DEVICE", "/dev/ttyUSB0")
|
||||
|
|
155
src/minilzlib/dictbuf.c
Normal file
155
src/minilzlib/dictbuf.c
Normal file
|
@ -0,0 +1,155 @@
|
|||
/*++
|
||||
|
||||
Copyright (c) Alex Ionescu. All rights reserved.
|
||||
|
||||
Module Name:
|
||||
|
||||
dictbuf.c
|
||||
|
||||
Abstract:
|
||||
|
||||
This module implements the management of the LZMA "history buffer" which is
|
||||
often called the "dictionary". Routines for writing into the history buffer
|
||||
as well as for reading back from it are implemented, as well as mechanisms
|
||||
for repeating previous symbols forward into the dictionary. This forms the
|
||||
basis for LZMA match distance-length pairs that are found and decompressed.
|
||||
Note that for simplicity's sake, the dictionary is stored directly in the
|
||||
output buffer, such that no "flushing" or copying is needed back and forth.
|
||||
|
||||
Author:
|
||||
|
||||
Alex Ionescu (@aionescu) 15-Apr-2020 - Initial version
|
||||
|
||||
Environment:
|
||||
|
||||
Windows & Linux, user mode and kernel mode.
|
||||
|
||||
--*/
|
||||
|
||||
#include "minlzlib.h"
|
||||
|
||||
//
|
||||
// State used for the history buffer (dictionary)
|
||||
//
|
||||
typedef struct _DICTIONARY_STATE
|
||||
{
|
||||
//
|
||||
// Buffer, start position, current position, and offset limit in the buffer
|
||||
//
|
||||
uint8_t* Buffer;
|
||||
uint32_t BufferSize;
|
||||
uint32_t Start;
|
||||
uint32_t Offset;
|
||||
uint32_t Limit;
|
||||
} DICTIONARY_STATE, *PDICTIONARY_STATE;
|
||||
DICTIONARY_STATE Dictionary;
|
||||
|
||||
void
|
||||
DtInitialize (
|
||||
uint8_t* HistoryBuffer,
|
||||
uint32_t Size
|
||||
)
|
||||
{
|
||||
//
|
||||
// Initialize the buffer and reset the position
|
||||
//
|
||||
Dictionary.Buffer = HistoryBuffer;
|
||||
Dictionary.Offset = 0;
|
||||
Dictionary.BufferSize = Size;
|
||||
}
|
||||
|
||||
bool
|
||||
DtSetLimit (
|
||||
uint32_t Limit
|
||||
)
|
||||
{
|
||||
//
|
||||
// Make sure that the passed in dictionary limit fits within the size, and
|
||||
// then set this as the new limit. Save the starting point (current offset)
|
||||
//
|
||||
if ((Dictionary.Offset + Limit) > Dictionary.BufferSize)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
Dictionary.Limit = Dictionary.Offset + Limit;
|
||||
Dictionary.Start = Dictionary.Offset;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
DtIsComplete (
|
||||
uint32_t* BytesProcessed
|
||||
)
|
||||
{
|
||||
//
|
||||
// Return bytes processed and if the dictionary has been fully written to
|
||||
//
|
||||
*BytesProcessed = Dictionary.Offset - Dictionary.Start;
|
||||
return (Dictionary.Offset == Dictionary.Limit);
|
||||
}
|
||||
|
||||
bool
|
||||
DtCanWrite (
|
||||
uint32_t* Position
|
||||
)
|
||||
{
|
||||
//
|
||||
// Return our position and make sure it's not beyond the uncompressed size
|
||||
//
|
||||
*Position = Dictionary.Offset;
|
||||
return (Dictionary.Offset < Dictionary.Limit);
|
||||
}
|
||||
|
||||
uint8_t
|
||||
DtGetSymbol (
|
||||
uint32_t Distance
|
||||
)
|
||||
{
|
||||
//
|
||||
// If the dictionary is still empty, just return 0, otherwise, return the
|
||||
// symbol that is Distance bytes backward.
|
||||
//
|
||||
if (Distance > Dictionary.Offset)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
return Dictionary.Buffer[Dictionary.Offset - Distance];
|
||||
}
|
||||
|
||||
void
|
||||
DtPutSymbol (
|
||||
uint8_t Symbol
|
||||
)
|
||||
{
|
||||
//
|
||||
// Write the symbol and advance our position
|
||||
//
|
||||
Dictionary.Buffer[Dictionary.Offset++] = Symbol;
|
||||
}
|
||||
|
||||
bool
|
||||
DtRepeatSymbol (
|
||||
uint32_t Length,
|
||||
uint32_t Distance
|
||||
)
|
||||
{
|
||||
//
|
||||
// Make sure we never get asked to write past the end of the dictionary. We
|
||||
// should also not allow the distance to go beyond the current offset since
|
||||
// DtGetSymbol will return 0 thinking the dictionary is empty.
|
||||
//
|
||||
if (((Length + Dictionary.Offset) > Dictionary.Limit) ||
|
||||
(Distance > Dictionary.Offset))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
//
|
||||
// Now rewrite the stream of past symbols forward into the dictionary.
|
||||
//
|
||||
do
|
||||
{
|
||||
DtPutSymbol(DtGetSymbol(Distance));
|
||||
} while (--Length > 0);
|
||||
return true;
|
||||
}
|
136
src/minilzlib/inputbuf.c
Normal file
136
src/minilzlib/inputbuf.c
Normal file
|
@ -0,0 +1,136 @@
|
|||
/*++
|
||||
|
||||
Copyright (c) Alex Ionescu. All rights reserved.
|
||||
|
||||
Module Name:
|
||||
|
||||
inputbuf.c
|
||||
|
||||
Abstract:
|
||||
|
||||
This module implements helper functions for managing the input buffer that
|
||||
contains arithmetic-coded LZ77 match distance-length pairs and raw literals
|
||||
Both seeking (such that an external reader can refer to multiple bytes) and
|
||||
reading (capturing) an individual byte are supported. Support for aligning
|
||||
input data to 4 bytes (which is a requirement for XZ-encoded files) is also
|
||||
implemented.
|
||||
|
||||
Author:
|
||||
|
||||
Alex Ionescu (@aionescu) 15-Apr-2020 - Initial version
|
||||
|
||||
Environment:
|
||||
|
||||
Windows & Linux, user mode and kernel mode.
|
||||
|
||||
--*/
|
||||
|
||||
#include "minlzlib.h"
|
||||
|
||||
//
|
||||
// Input Buffer State
|
||||
//
|
||||
typedef struct _BUFFER_STATE
|
||||
{
|
||||
//
|
||||
// Start of the buffer, current offset, current packet end, and total input size
|
||||
//
|
||||
uint8_t* Buffer;
|
||||
uint32_t Offset;
|
||||
uint32_t SoftLimit;
|
||||
uint32_t Size;
|
||||
} BUFFER_STATE, * PBUFFER_STATE;
|
||||
BUFFER_STATE In;
|
||||
|
||||
bool
|
||||
BfAlign (
|
||||
void
|
||||
)
|
||||
{
|
||||
uint8_t padByte;
|
||||
//
|
||||
// Keep reading until we reach 32-bit alignment. All bytes must be zero.
|
||||
//
|
||||
while (In.Offset & 3)
|
||||
{
|
||||
if (!BfRead(&padByte) || (padByte != 0))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
BfSetSoftLimit (
|
||||
uint32_t Remaining
|
||||
)
|
||||
{
|
||||
if ((In.Size - In.Offset) < Remaining)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
In.SoftLimit = In.Offset + Remaining;
|
||||
return true;
|
||||
}
|
||||
|
||||
void
|
||||
BfResetSoftLimit (
|
||||
void
|
||||
)
|
||||
{
|
||||
In.SoftLimit = In.Size;
|
||||
}
|
||||
|
||||
bool
|
||||
BfSeek (
|
||||
uint32_t Length,
|
||||
uint8_t** Bytes
|
||||
)
|
||||
{
|
||||
//
|
||||
// Make sure the input buffer has enough space to seek the desired size, if
|
||||
// it does, return the current position and then seek past the desired size
|
||||
//
|
||||
if ((In.Offset + Length) > In.SoftLimit)
|
||||
{
|
||||
*Bytes = 0;
|
||||
return false;
|
||||
}
|
||||
*Bytes = &In.Buffer[In.Offset];
|
||||
In.Offset += Length;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
BfRead (
|
||||
uint8_t* Byte
|
||||
)
|
||||
{
|
||||
uint8_t* pByte;
|
||||
//
|
||||
// Seek past the byte and read it
|
||||
//
|
||||
if (!BfSeek(sizeof(*Byte), &pByte))
|
||||
{
|
||||
*Byte = 0;
|
||||
return false;
|
||||
}
|
||||
*Byte = *pByte;
|
||||
return true;
|
||||
}
|
||||
|
||||
void
|
||||
BfInitialize (
|
||||
uint8_t* InputBuffer,
|
||||
uint32_t InputSize
|
||||
)
|
||||
{
|
||||
//
|
||||
// Save all the data in the context buffer state
|
||||
//
|
||||
In.Buffer = InputBuffer;
|
||||
In.Size = InputSize;
|
||||
In.SoftLimit = InputSize;
|
||||
In.Offset = 0;
|
||||
}
|
228
src/minilzlib/lzma2dec.c
Normal file
228
src/minilzlib/lzma2dec.c
Normal file
|
@ -0,0 +1,228 @@
|
|||
/*++
|
||||
|
||||
Copyright (c) Alex Ionescu. All rights reserved.
|
||||
|
||||
Module Name:
|
||||
|
||||
lzma2dec.c
|
||||
|
||||
Abstract:
|
||||
|
||||
This module implements the LZMA2 decoding logic responsible for parsing the
|
||||
LZMA2 Control Byte, the Information Bytes (Compressed & Uncompressed Stream
|
||||
Size), and the Property Byte during the initial Dictionary Reset. Note that
|
||||
this module only implements support for a single such reset (i.e.: archives
|
||||
in "solid" mode).
|
||||
|
||||
Author:
|
||||
|
||||
Alex Ionescu (@aionescu) 15-Apr-2020 - Initial version
|
||||
|
||||
Environment:
|
||||
|
||||
Windows & Linux, user mode and kernel mode.
|
||||
|
||||
--*/
|
||||
|
||||
#include "minlzlib.h"
|
||||
#include "lzma2dec.h"
|
||||
|
||||
bool
|
||||
Lz2DecodeChunk (
|
||||
uint32_t* BytesProcessed,
|
||||
uint32_t RawSize,
|
||||
uint16_t CompressedSize
|
||||
)
|
||||
{
|
||||
uint32_t bytesProcessed;
|
||||
|
||||
//
|
||||
// Go and decode this chunk, sequence by sequence
|
||||
//
|
||||
if (!LzDecode())
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
//
|
||||
// In a correctly formatted stream, the last arithmetic-coded sequence must
|
||||
// be zero once we finished with the last chunk. Make sure the stream ended
|
||||
// exactly where we expected it to.
|
||||
//
|
||||
if (!RcIsComplete(&bytesProcessed) || (bytesProcessed != CompressedSize))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
//
|
||||
// The entire output stream must have been written to, and the dictionary
|
||||
// must be full now.
|
||||
//
|
||||
if (!DtIsComplete(&bytesProcessed) || (bytesProcessed != RawSize))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
*BytesProcessed += bytesProcessed;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
Lz2DecodeStream (
|
||||
uint32_t* BytesProcessed,
|
||||
bool GetSizeOnly
|
||||
)
|
||||
{
|
||||
uint8_t* inBytes;
|
||||
LZMA2_CONTROL_BYTE controlByte;
|
||||
uint8_t propertyByte;
|
||||
uint32_t rawSize;
|
||||
uint16_t compressedSize;
|
||||
|
||||
//
|
||||
// Read the first control byte
|
||||
//
|
||||
*BytesProcessed = 0;
|
||||
while (BfRead(&controlByte.Value))
|
||||
{
|
||||
//
|
||||
// When the LZMA2 control byte is 0, the entire stream is decoded. This
|
||||
// is the only success path out of this function.
|
||||
//
|
||||
if (controlByte.Value == 0)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
//
|
||||
// Read the appropriate number of info bytes based on the stream type.
|
||||
//
|
||||
if (!BfSeek((controlByte.u.Common.IsLzma == 1 ) ? 4 : 2, &inBytes))
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
//
|
||||
// For LZMA streams calculate both the uncompressed and compressed size
|
||||
// from the info bytes. Uncompressed streams only have the former.
|
||||
//
|
||||
if (controlByte.u.Common.IsLzma == 1)
|
||||
{
|
||||
rawSize = controlByte.u.Lzma.RawSize << 16;
|
||||
compressedSize = inBytes[2] << 8;
|
||||
compressedSize += inBytes[3] + 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
rawSize = 0;
|
||||
compressedSize = 0;
|
||||
}
|
||||
|
||||
//
|
||||
// Make sure that the output buffer that was supplied is big enough to
|
||||
// fit the uncompressed chunk, unless we're just calculating the size.
|
||||
//
|
||||
rawSize += inBytes[0] << 8;
|
||||
rawSize += inBytes[1] + 1;
|
||||
if (!GetSizeOnly && !DtSetLimit(rawSize))
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
//
|
||||
// Check if the full LZMA state needs to be reset, which must happen at
|
||||
// the start of stream. Also check for a property reset, which occurs
|
||||
// when an LZMA stream follows an uncompressed stream. Separately,
|
||||
// check for a state reset without a property byte (happens rarely,
|
||||
// but does happen in a few compressed streams).
|
||||
//
|
||||
if ((controlByte.u.Lzma.ResetState == Lzma2FullReset) ||
|
||||
(controlByte.u.Lzma.ResetState == Lzma2PropertyReset))
|
||||
{
|
||||
//
|
||||
// Read the LZMA properties and then initialize the decoder.
|
||||
//
|
||||
if (!BfRead(&propertyByte) || !LzInitialize(propertyByte))
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
else if (controlByte.u.Lzma.ResetState == Lzma2SimpleReset)
|
||||
{
|
||||
LzResetState();
|
||||
}
|
||||
//
|
||||
// else controlByte.u.Lzma.ResetState == Lzma2NoReset, since a two-bit
|
||||
// field only has four possible values
|
||||
//
|
||||
|
||||
//
|
||||
// Don't do any decompression if the caller only wants to know the size
|
||||
//
|
||||
if (GetSizeOnly)
|
||||
{
|
||||
*BytesProcessed += rawSize;
|
||||
BfSeek((controlByte.u.Common.IsLzma == 1) ? compressedSize : rawSize,
|
||||
&inBytes);
|
||||
continue;
|
||||
}
|
||||
else if (controlByte.u.Common.IsLzma == 0)
|
||||
{
|
||||
//
|
||||
// Seek to the requested size in the input buffer
|
||||
//
|
||||
if (!BfSeek(rawSize, &inBytes))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
//
|
||||
// Copy the data into the dictionary as-is
|
||||
//
|
||||
for (uint32_t i = 0; i < rawSize; i++)
|
||||
{
|
||||
DtPutSymbol(inBytes[i]);
|
||||
}
|
||||
|
||||
//
|
||||
// Update bytes and keep going to the next chunk
|
||||
//
|
||||
*BytesProcessed += rawSize;
|
||||
continue;
|
||||
}
|
||||
|
||||
//
|
||||
// Record how many bytes are left in this sequence as our SoftLimit for
|
||||
// the other operations. This allows us to omit most range checking
|
||||
// logic in rangedec.c. This soft limit lasts until reset below.
|
||||
//
|
||||
if (!BfSetSoftLimit(compressedSize))
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
//
|
||||
// Read the initial range and code bytes to initialize the arithmetic
|
||||
// coding decoder, and let it know how much input data exists. We've
|
||||
// already validated that this much space exists in the input buffer.
|
||||
//
|
||||
if (!RcInitialize(&compressedSize))
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
//
|
||||
// Start decoding the LZMA sequences in this chunk
|
||||
//
|
||||
if (!Lz2DecodeChunk(BytesProcessed, rawSize, compressedSize))
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
//
|
||||
// Having decoded that chunk, reset our soft limit (to the full
|
||||
// input stream) so we can read the next chunk.
|
||||
//
|
||||
BfResetSoftLimit();
|
||||
}
|
||||
return false;
|
||||
}
|
92
src/minilzlib/lzma2dec.h
Normal file
92
src/minilzlib/lzma2dec.h
Normal file
|
@ -0,0 +1,92 @@
|
|||
/*++
|
||||
|
||||
Copyright (c) Alex Ionescu. All rights reserved.
|
||||
|
||||
Module Name:
|
||||
|
||||
lzma2dec.h
|
||||
|
||||
Abstract:
|
||||
|
||||
This header file contains C-style data structures and enumerations that map
|
||||
back to the LZMA2 standard. This includes the encoding of the LZMA2 Control
|
||||
Byte and the possible LZMA2 Reset States.
|
||||
|
||||
Author:
|
||||
|
||||
Alex Ionescu (@aionescu) 15-Apr-2020 - Initial version
|
||||
|
||||
Environment:
|
||||
|
||||
Windows & Linux, user mode and kernel mode.
|
||||
|
||||
--*/
|
||||
|
||||
#pragma once
|
||||
#pragma warning(disable:4214)
|
||||
|
||||
//
|
||||
// The most complex LZMA sequence possible is a "match" sequence where the
|
||||
// the length is > 127 bytes, and the distance is > 127 bytes. This type of
|
||||
// sequence starts with {1,1} for "match", followed by {1,1,nnnnnnnn} for
|
||||
// "8-bit encoded length", followed by {1,1,1,1,1,1} to select the distance
|
||||
// slot (63). That's 18 bits so far, which all come from arithmetic-coded
|
||||
// bit trees with various probabilities. The next 26 bits are going to be
|
||||
// fixed-probability, meaning that the bit tree is mathematically hardcoded
|
||||
// at 50%. Finally, there are the last 4 "align" distance bits which also
|
||||
// come from an arithmetic-coded bit tree, bringing the total such bits to
|
||||
// 22.
|
||||
//
|
||||
// Each time we have to "normalize" the arithmetic coder, it consumes an
|
||||
// additional byte. Normalization is done whenever we consume more than 8
|
||||
// of the high bits of the coder's range (i.e.: below 2^24), so exactly
|
||||
// every 8 direct bits (which always halve the range due to their 50%).
|
||||
// The other bits can have arbitrary probabilities, but in the worst case
|
||||
// we need to normalize the range every n bits. As such, this is a total of
|
||||
// 20 worst-case normalization per LZMA sequence. Finally, we do one last
|
||||
// normalization at the end of LzDecode, to make sure that the decoder is
|
||||
// always in a normalized state. This means that a compressed chunk should
|
||||
// be at least 21 bytes if we want to guarantee that LzDecode can never
|
||||
// read past the current input stream, and avoid range checking.
|
||||
//
|
||||
#define LZMA_MAX_SEQUENCE_SIZE 21
|
||||
|
||||
//
|
||||
// This describes the different ways an LZMA2 control byte can request a reset
|
||||
//
|
||||
typedef enum _LZMA2_COMPRESSED_RESET_STATE
|
||||
{
|
||||
Lzma2NoReset = 0,
|
||||
Lzma2SimpleReset = 1,
|
||||
Lzma2PropertyReset = 2,
|
||||
Lzma2FullReset = 3
|
||||
} LZMA2_COMPRESSED_RESET_STATE;
|
||||
|
||||
//
|
||||
// This describes how an LZMA2 control byte can be parsed
|
||||
//
|
||||
typedef union _LZMA2_CONTROL_BYTE
|
||||
{
|
||||
union
|
||||
{
|
||||
struct
|
||||
{
|
||||
uint8_t ResetState : 2;
|
||||
uint8_t Reserved : 5;
|
||||
uint8_t IsLzma : 1;
|
||||
} Raw;
|
||||
struct
|
||||
{
|
||||
uint8_t RawSize : 5;
|
||||
uint8_t ResetState : 2;
|
||||
uint8_t IsLzma : 1;
|
||||
} Lzma;
|
||||
struct
|
||||
{
|
||||
uint8_t : 7;
|
||||
uint8_t IsLzma : 1;
|
||||
} Common;
|
||||
} u;
|
||||
uint8_t Value;
|
||||
} LZMA2_CONTROL_BYTE;
|
||||
static_assert(sizeof(LZMA2_CONTROL_BYTE) == 1, "Invalid control byte size");
|
627
src/minilzlib/lzmadec.c
Normal file
627
src/minilzlib/lzmadec.c
Normal file
|
@ -0,0 +1,627 @@
|
|||
/*++
|
||||
|
||||
Copyright (c) Alex Ionescu. All rights reserved.
|
||||
|
||||
Module Name:
|
||||
|
||||
lzmadec.c
|
||||
|
||||
Abstract:
|
||||
|
||||
This module implements the LZMA Decoding Logic responsible for decoding the
|
||||
three possible types of LZMA "packets": matches, repetitions (short \& long)
|
||||
and literals. The probability model for each type of packet is also stored
|
||||
in this file, along with the management of the previously seen packet types
|
||||
(which is tracked as the "sequence").
|
||||
|
||||
Author:
|
||||
|
||||
Alex Ionescu (@aionescu) 15-Apr-2020 - Initial version
|
||||
|
||||
Environment:
|
||||
|
||||
Windows & Linux, user mode and kernel mode.
|
||||
|
||||
--*/
|
||||
|
||||
#include "minlzlib.h"
|
||||
#include "lzmadec.h"
|
||||
|
||||
//
|
||||
// Probability Bit Model for Lenghts in Rep and in Match sequences
|
||||
//
|
||||
typedef struct _LENGTH_DECODER_STATE
|
||||
{
|
||||
//
|
||||
// Bit Model for the choosing the type of length encoding
|
||||
//
|
||||
uint16_t Choice;
|
||||
uint16_t Choice2;
|
||||
//
|
||||
// Bit Model for each of the length encodings
|
||||
//
|
||||
uint16_t Low[LZMA_POSITION_COUNT][LZMA_MAX_LOW_LENGTH];
|
||||
uint16_t Mid[LZMA_POSITION_COUNT][LZMA_MAX_MID_LENGTH];
|
||||
uint16_t High[LZMA_MAX_HIGH_LENGTH];
|
||||
} LENGTH_DECODER_STATE, * PLENGTH_DECODER_STATE;
|
||||
|
||||
//
|
||||
// State used for LZMA decoding
|
||||
//
|
||||
typedef struct _DECODER_STATE
|
||||
{
|
||||
//
|
||||
// Current type of sequence last decoded
|
||||
//
|
||||
LZMA_SEQUENCE_STATE Sequence;
|
||||
//
|
||||
// History of last 4 decoded distances
|
||||
//
|
||||
uint32_t Rep0;
|
||||
uint32_t Rep1;
|
||||
uint32_t Rep2;
|
||||
uint32_t Rep3;
|
||||
//
|
||||
// Pending length to repeat from dictionary
|
||||
//
|
||||
uint32_t Len;
|
||||
//
|
||||
// Probability Bit Models for all sequence types
|
||||
//
|
||||
union
|
||||
{
|
||||
struct
|
||||
{
|
||||
//
|
||||
// Literal model
|
||||
//
|
||||
uint16_t Literal[LZMA_LITERAL_CODERS][LZMA_LC_MODEL_SIZE];
|
||||
//
|
||||
// Last-used-distance based models
|
||||
//
|
||||
uint16_t Rep[LzmaMaxState];
|
||||
uint16_t Rep0[LzmaMaxState];
|
||||
uint16_t Rep0Long[LzmaMaxState][LZMA_POSITION_COUNT];
|
||||
uint16_t Rep1[LzmaMaxState];
|
||||
uint16_t Rep2[LzmaMaxState];
|
||||
LENGTH_DECODER_STATE RepLen;
|
||||
//
|
||||
// Explicit distance match based models
|
||||
//
|
||||
uint16_t Match[LzmaMaxState][LZMA_POSITION_COUNT];
|
||||
uint16_t DistSlot[LZMA_FIRST_CONTEXT_DISTANCE_SLOT][LZMA_DISTANCE_SLOTS];
|
||||
uint16_t Dist[(1 << 7) - LZMA_FIRST_FIXED_DISTANCE_SLOT];
|
||||
uint16_t Align[LZMA_DISTANCE_ALIGN_SLOTS];
|
||||
LENGTH_DECODER_STATE MatchLen;
|
||||
} BitModel;
|
||||
uint16_t RawProbabilities[LZMA_BIT_MODEL_SLOTS];
|
||||
} u;
|
||||
} DECODER_STATE, *PDECODER_STATE;
|
||||
DECODER_STATE Decoder;
|
||||
|
||||
//
|
||||
// LZMA decoding uses 3 "properties" which determine how the probability
|
||||
// bit model will be laid out. These store the number of bits that are used
|
||||
// to pick the correct Literal Coder ("lc"), the number of Position bits to
|
||||
// select the Literal coder ("lp"), and the number of Position Bits used to
|
||||
// select various lengths ("pb"). In LZMA2, these properties are encoded in
|
||||
// a single byte with the formula: ((pb * 45) + lp * 9) + lc).
|
||||
//
|
||||
// We only support the default {lc = 3, lp = 0, pb = 2} properties, which
|
||||
// are what the main encoders out there use. This means that a total of 2
|
||||
// bits will be used for arithmetic-coded bit trees that are dependent on
|
||||
// the current position, and that a total of 3 bits will be used when we
|
||||
// pick the arithmetic-coded bit tree used for literal coding. The 0 means
|
||||
// this selection will _not_ be dependent on the position in the buffer.
|
||||
//
|
||||
const uint8_t k_LzSupportedProperties =
|
||||
(LZMA_PB * 45) + (LZMA_LP * 9) + (LZMA_LC);
|
||||
|
||||
void
|
||||
LzSetLiteral (
|
||||
PLZMA_SEQUENCE_STATE State
|
||||
)
|
||||
{
|
||||
if (*State <= LzmaLitShortrepLitLitState)
|
||||
{
|
||||
//
|
||||
// States 0-3 represent packets with at least 2 back-to-back literals,
|
||||
// so another literal now takes us to state 0 (3 back-to-back literals)
|
||||
//
|
||||
*State = LzmaLitLitLitState;
|
||||
}
|
||||
else if (*State <= LzmaLitShortrepState)
|
||||
{
|
||||
//
|
||||
// States 4-6 represent packets with a literal at the end, so seeing
|
||||
// another literal now takes us to 2 back-to-back literals, which are
|
||||
// state packets 1-3.
|
||||
//
|
||||
// States 7-9 represent packets with a literal at the start, followed
|
||||
// by a match/rep/shortrep. Seeing another literal now drops this first
|
||||
// literal and takes us to having a literal at the end, which are state
|
||||
// packets 4-6 that we just described in the paragraph above.
|
||||
//
|
||||
*State = (LZMA_SEQUENCE_STATE)(*State - 3);
|
||||
}
|
||||
else
|
||||
{
|
||||
//
|
||||
// Finally, state 10 and 11 represent cases without a single literal in
|
||||
// the last 2 sequence packets, so seeing a literal now takes us to a
|
||||
// "literal at the end" state, either following a match or a rep.
|
||||
//
|
||||
*State = (LZMA_SEQUENCE_STATE)(*State - 6);
|
||||
}
|
||||
}
|
||||
|
||||
bool
|
||||
LzIsLiteral (
|
||||
LZMA_SEQUENCE_STATE State
|
||||
)
|
||||
{
|
||||
//
|
||||
// States 0-6 describe literal packet sequences
|
||||
//
|
||||
return State < LzmaMaxLitState;
|
||||
}
|
||||
|
||||
void
|
||||
LzSetMatch (
|
||||
PLZMA_SEQUENCE_STATE State
|
||||
)
|
||||
{
|
||||
//
|
||||
// Move to the appropriate "match" state based on current literal state
|
||||
//
|
||||
*State = LzIsLiteral(*State) ? LzmaLitMatchState : LzmaNonlitMatchState;
|
||||
}
|
||||
|
||||
void
|
||||
LzSetLongRep (
|
||||
PLZMA_SEQUENCE_STATE State
|
||||
)
|
||||
{
|
||||
//
|
||||
// Move to the appropriate "long rep" state based on current literal state
|
||||
//
|
||||
*State = LzIsLiteral(*State) ? LzmaLitRepState : LzmaNonlitRepState;
|
||||
}
|
||||
|
||||
void
|
||||
LzSetShortRep (
|
||||
PLZMA_SEQUENCE_STATE State
|
||||
)
|
||||
{
|
||||
//
|
||||
// Move to the appropriate "short rep" state based on current literal state
|
||||
//
|
||||
*State = LzIsLiteral(*State) ? LzmaLitShortrepState : LzmaNonlitRepState;
|
||||
}
|
||||
|
||||
uint16_t*
|
||||
LzGetLiteralSlot (
|
||||
void
|
||||
)
|
||||
{
|
||||
uint8_t symbol;
|
||||
|
||||
//
|
||||
// To pick the correct literal coder arithmetic-coded bit tree, LZMA uses
|
||||
// the "lc" parameter to choose the number of high bits from the previous
|
||||
// symbol (in the normal case, 3). It then combines that with the "lp"
|
||||
// parameter to choose the number of low bits from the current position in
|
||||
// the dictionary. However, since "lp" is normally 0, we can omit this.
|
||||
//
|
||||
symbol = DtGetSymbol(1);
|
||||
return Decoder.u.BitModel.Literal[symbol >> (8 - LZMA_LC)];
|
||||
}
|
||||
|
||||
uint16_t*
|
||||
LzGetDistSlot (
|
||||
void
|
||||
)
|
||||
{
|
||||
uint8_t slotIndex;
|
||||
|
||||
//
|
||||
// There are 4 different arithmetic-coded bit trees which are used to pick
|
||||
// the correct "distance slot" when doing match distance decoding. Each of
|
||||
// them is used based on the length of the symbol that is being repeated.
|
||||
// For lengths of 2, 3, 4 bytes, a dedicated set of distance slots is used.
|
||||
// For lengths of 5 bytes or above, a shared set of distance slots is used.
|
||||
//
|
||||
if (Decoder.Len < (LZMA_FIRST_CONTEXT_DISTANCE_SLOT + LZMA_MIN_LENGTH))
|
||||
{
|
||||
slotIndex = (uint8_t)(Decoder.Len - LZMA_MIN_LENGTH);
|
||||
}
|
||||
else
|
||||
{
|
||||
slotIndex = LZMA_FIRST_CONTEXT_DISTANCE_SLOT - 1;
|
||||
}
|
||||
return Decoder.u.BitModel.DistSlot[slotIndex];
|
||||
}
|
||||
|
||||
void
|
||||
LzDecodeLiteral (
|
||||
void
|
||||
)
|
||||
{
|
||||
uint16_t* probArray;
|
||||
uint8_t symbol, matchByte;
|
||||
|
||||
//
|
||||
// First, choose the correct arithmetic-coded bit tree (which is based on
|
||||
// the last symbol we just decoded), then see if we last decoded a literal.
|
||||
//
|
||||
// If so, simply get the symbol from the bit tree as normal. However, if
|
||||
// we didn't last see a literal, we need to read the "match byte" that is
|
||||
// "n" bytes away from the last decoded match. We previously stored this in
|
||||
// rep0.
|
||||
//
|
||||
// Based on this match byte, we'll then use 2 other potential bit trees,
|
||||
// see LzDecodeMatched for more information.
|
||||
//
|
||||
probArray = LzGetLiteralSlot();
|
||||
if (LzIsLiteral(Decoder.Sequence))
|
||||
{
|
||||
|
||||
symbol = RcGetBitTree(probArray, (1 << 8));
|
||||
}
|
||||
else
|
||||
{
|
||||
matchByte = DtGetSymbol(Decoder.Rep0 + 1);
|
||||
symbol = RcDecodeMatchedBitTree(probArray, matchByte);
|
||||
}
|
||||
|
||||
//
|
||||
// Write the symbol and indicate that the last sequence was a literal
|
||||
//
|
||||
DtPutSymbol(symbol);
|
||||
LzSetLiteral(&Decoder.Sequence);
|
||||
}
|
||||
|
||||
void
|
||||
LzDecodeLen (
|
||||
PLENGTH_DECODER_STATE LenState,
|
||||
uint8_t PosBit
|
||||
)
|
||||
{
|
||||
uint16_t* probArray;
|
||||
uint16_t limit;
|
||||
|
||||
//
|
||||
// Lenghts of 2 and higher are encoded in 3 possible types of arithmetic-
|
||||
// coded bit trees, depending on the size of the length.
|
||||
//
|
||||
// Lengths 2-9 are encoded in trees called "Low" using 3 bits of data.
|
||||
// Lengths 10-17 are encoded in trees called "Mid" using 3 bits of data.
|
||||
// Lengths 18-273 are encoded in a tree called "high" using 8 bits of data.
|
||||
//
|
||||
// The appropriate "Low" or "Mid" tree is selected based on the bottom 2
|
||||
// position bits (0-3) (in the LZMA standard, this is based on the "pb",
|
||||
// while the "High" tree is shared for all positions.
|
||||
//
|
||||
// Two arithmetic-coded bit trees, called "Choice" and "Choice2" tell us
|
||||
// the type of Length, so we can choose the right tree. {0, n} tells us
|
||||
// to use the Low trees, while {1, 0} tells us to use the Mid trees. Lastly
|
||||
// {1, 1} tells us to use the High tree.
|
||||
//
|
||||
Decoder.Len = LZMA_MIN_LENGTH;
|
||||
if (RcIsBitSet(&LenState->Choice))
|
||||
{
|
||||
if (RcIsBitSet(&LenState->Choice2))
|
||||
{
|
||||
probArray = LenState->High;
|
||||
limit = LZMA_MAX_HIGH_LENGTH;
|
||||
Decoder.Len += LZMA_MAX_LOW_LENGTH + LZMA_MAX_MID_LENGTH;
|
||||
}
|
||||
else
|
||||
{
|
||||
probArray = LenState->Mid[PosBit];
|
||||
limit = LZMA_MAX_MID_LENGTH;
|
||||
Decoder.Len += LZMA_MAX_LOW_LENGTH;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
probArray = LenState->Low[PosBit];
|
||||
limit = LZMA_MAX_LOW_LENGTH;
|
||||
}
|
||||
Decoder.Len += RcGetBitTree(probArray, limit);
|
||||
}
|
||||
|
||||
void
|
||||
LzDecodeMatch (
|
||||
uint8_t PosBit
|
||||
)
|
||||
{
|
||||
uint16_t* probArray;
|
||||
uint8_t distSlot, distBits;
|
||||
|
||||
//
|
||||
// Decode the length component of the "match" sequence. Then, since we're
|
||||
// about to decode a new distance, update our history by one level.
|
||||
//
|
||||
LzDecodeLen(&Decoder.u.BitModel.MatchLen, PosBit);
|
||||
Decoder.Rep3 = Decoder.Rep2;
|
||||
Decoder.Rep2 = Decoder.Rep1;
|
||||
Decoder.Rep1 = Decoder.Rep0;
|
||||
|
||||
//
|
||||
// Read the first 6 bits, which make up the "distance slot"
|
||||
//
|
||||
probArray = LzGetDistSlot();
|
||||
distSlot = RcGetBitTree(probArray, LZMA_DISTANCE_SLOTS);
|
||||
if (distSlot < LZMA_FIRST_CONTEXT_DISTANCE_SLOT)
|
||||
{
|
||||
//
|
||||
// Slots 0-3 directly encode the distance as a literal number
|
||||
//
|
||||
Decoder.Rep0 = distSlot;
|
||||
}
|
||||
else
|
||||
{
|
||||
//
|
||||
// For slots 4-13, figure out how many "context encoded bits" are used
|
||||
// to encode this distance. The math works out such that slots 4-5 use
|
||||
// 1 bit, 6-7 use 2 bits, 8-9 use 3 bits, and so on and so forth until
|
||||
// slots 12-13 which use 5 bits.
|
||||
//
|
||||
// This gives us anywhere from 1-5 bits, plus the two upper bits which
|
||||
// can either be 0b10 or 0b11 (based on the bottom bit of the distance
|
||||
// slot). Thus, with the context encoded bits, we can represent lengths
|
||||
// anywhere from 0b10[0] to 0b11[11111] (i.e.: 4-127).
|
||||
//
|
||||
// For slots 14-63, we use "fixed 50% probability bits" which are also
|
||||
// called "direct bits". The formula below also tells us how many such
|
||||
// direct bits to use in this scenario. In other words, distBits can
|
||||
// either be the number of "context encoded bits" for slots 4-13, or it
|
||||
// can be the the number of "direct bits" for slots 14-63. This gives
|
||||
// us a range of of 2 to 26 bits, which are then used as middle bits.
|
||||
// Finally, the last 4 bits are called the "align" bits. The smallest
|
||||
// possible number we can encode is now going to be 0b10[00][0000] and
|
||||
// the highest is 0b11[1111111111111111111111111][1111], in other words
|
||||
// 128 to (2^31)-1.
|
||||
//
|
||||
distBits = (distSlot >> 1) - 1;
|
||||
Decoder.Rep0 = (0b10 | (distSlot & 1)) << distBits;
|
||||
|
||||
//
|
||||
// Slots 4-13 have their own arithmetic-coded reverse bit trees. Slots
|
||||
// 14-63 encode the middle "direct bits" with fixed 50% probability and
|
||||
// the bottom 4 "align bits" with a shared arithmetic-coded reverse bit
|
||||
// tree.
|
||||
//
|
||||
if (distSlot < LZMA_FIRST_FIXED_DISTANCE_SLOT)
|
||||
{
|
||||
probArray = &Decoder.u.BitModel.Dist[Decoder.Rep0 - distSlot];
|
||||
}
|
||||
else
|
||||
{
|
||||
Decoder.Rep0 |= RcGetFixed(distBits - LZMA_DISTANCE_ALIGN_BITS) <<
|
||||
LZMA_DISTANCE_ALIGN_BITS;
|
||||
distBits = LZMA_DISTANCE_ALIGN_BITS;
|
||||
probArray = Decoder.u.BitModel.Align;
|
||||
}
|
||||
Decoder.Rep0 |= RcGetReverseBitTree(probArray, distBits);
|
||||
}
|
||||
|
||||
//
|
||||
// Indicate that the last sequence was a "match"
|
||||
//
|
||||
LzSetMatch(&Decoder.Sequence);
|
||||
}
|
||||
|
||||
void
|
||||
LzDecodeRepLen (
|
||||
uint8_t PosBit,
|
||||
bool IsLongRep
|
||||
)
|
||||
{
|
||||
//
|
||||
// Decode the length byte and indicate the last sequence was a "rep".
|
||||
// If this is a short rep, then the length is always hard-coded to 1.
|
||||
//
|
||||
if (IsLongRep)
|
||||
{
|
||||
LzDecodeLen(&Decoder.u.BitModel.RepLen, PosBit);
|
||||
LzSetLongRep(&Decoder.Sequence);
|
||||
}
|
||||
else
|
||||
{
|
||||
Decoder.Len = 1;
|
||||
LzSetShortRep(&Decoder.Sequence);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
LzDecodeRep0(
|
||||
uint8_t PosBit
|
||||
)
|
||||
{
|
||||
uint8_t bit;
|
||||
|
||||
//
|
||||
// This could be a "short rep" with a length of 1, or a "long rep0" with
|
||||
// a length that we have to decode. The next bit tells us this, using the
|
||||
// arithmetic-coded bit trees stored in "Rep0Long", with 1 tree for each
|
||||
// position bit (0-3).
|
||||
//
|
||||
bit = RcIsBitSet(&Decoder.u.BitModel.Rep0Long[Decoder.Sequence][PosBit]);
|
||||
LzDecodeRepLen(PosBit, bit);
|
||||
}
|
||||
|
||||
void
|
||||
LzDecodeLongRep (
|
||||
uint8_t PosBit
|
||||
)
|
||||
{
|
||||
uint32_t newRep;
|
||||
|
||||
//
|
||||
// Read the next 2 bits to figure out which of the recently used distances
|
||||
// we should use for this match. The following three states are possible :
|
||||
//
|
||||
// {0,n} - "Long rep1", where the length is stored in an arithmetic-coded
|
||||
// bit tree, and the distance is the 2nd most recently used distance (Rep1)
|
||||
//
|
||||
// {1,0} - "Long rep2", where the length is stored in an arithmetic-coded
|
||||
// bit tree, and the distance is the 3rd most recently used distance (Rep2)
|
||||
//
|
||||
// {1,1} - "Long rep3", where the length is stored in an arithmetic-coded
|
||||
// bit tree, and the distance is the 4th most recently used distance (Rep3)
|
||||
//
|
||||
// Once we have the right one, we must slide down each previously recently
|
||||
// used distance, so that the distance we're now using (Rep1, Rep2 or Rep3)
|
||||
// becomes "Rep0" again.
|
||||
//
|
||||
if (RcIsBitSet(&Decoder.u.BitModel.Rep1[Decoder.Sequence]))
|
||||
{
|
||||
if (RcIsBitSet(&Decoder.u.BitModel.Rep2[Decoder.Sequence]))
|
||||
{
|
||||
newRep = Decoder.Rep3;
|
||||
Decoder.Rep3 = Decoder.Rep2;
|
||||
}
|
||||
else
|
||||
{
|
||||
newRep = Decoder.Rep2;
|
||||
}
|
||||
Decoder.Rep2 = Decoder.Rep1;
|
||||
}
|
||||
else
|
||||
{
|
||||
newRep = Decoder.Rep1;
|
||||
}
|
||||
Decoder.Rep1 = Decoder.Rep0;
|
||||
Decoder.Rep0 = newRep;
|
||||
LzDecodeRepLen(PosBit, true);
|
||||
}
|
||||
|
||||
void
|
||||
LzDecodeRep (
|
||||
uint8_t PosBit
|
||||
)
|
||||
{
|
||||
//
|
||||
// We know this is an LZ77 distance-length pair where the distance is based
|
||||
// on a history of up to 4 previously used distance (Rep0-3). To know which
|
||||
// distance to use, the following 5 bit positions are possible (keeping in
|
||||
// mind that we've already decoded the first 2 bits {1,1} in LzDecode which
|
||||
// got us here in the first place):
|
||||
//
|
||||
// {0,0} - "Short rep", where the length is always 1 and distance is always
|
||||
// the most recently used distance (Rep0).
|
||||
//
|
||||
// {0,1} - "Long rep0", where the length is stored in an arithmetic-coded
|
||||
// bit tree, and the distance is the most recently used distance (Rep0).
|
||||
//
|
||||
// Because both of these possibilities just use Rep0, LzDecodeRep0 handles
|
||||
// these two cases. Otherwise, we use LzDecodeLongRep to read up to two
|
||||
// additional bits to figure out which recently used distance (1, 2, or 3)
|
||||
// to use.
|
||||
//
|
||||
if (RcIsBitSet(&Decoder.u.BitModel.Rep0[Decoder.Sequence]))
|
||||
{
|
||||
LzDecodeLongRep(PosBit);
|
||||
}
|
||||
else
|
||||
{
|
||||
LzDecodeRep0(PosBit);
|
||||
}
|
||||
}
|
||||
|
||||
bool
|
||||
LzDecode (
|
||||
void
|
||||
)
|
||||
{
|
||||
uint32_t position;
|
||||
uint8_t posBit;
|
||||
|
||||
//
|
||||
// Get the current position in dictionary, making sure we have input bytes.
|
||||
// Once we run out of bytes, normalize the last arithmetic coded byte and
|
||||
// ensure there's no pending lengths that we haven't yet repeated.
|
||||
//
|
||||
while (DtCanWrite(&position) && RcCanRead())
|
||||
{
|
||||
//
|
||||
// An LZMA packet begins here, which can have 3 possible initial bit
|
||||
// sequences that correspond to the type of encoding that was chosen
|
||||
// to represent the next stream of symbols.
|
||||
//
|
||||
// {0, n} represents a "literal", which LzDecodeLiteral decodes.
|
||||
// Literals are a single byte encoded with arithmetic-coded bit trees
|
||||
//
|
||||
// {1, 0} represents a "match", which LzDecodeMatch decodes.
|
||||
// Matches are typical LZ77 sequences with explicit length and distance
|
||||
//
|
||||
// {1, 1} represents a "rep", which LzDecodeRep decodes.
|
||||
// Reps are LZ77 sequences where the distance is encoded as a reference
|
||||
// to a previously used distance (up to 4 -- called "Rep0-3").
|
||||
//
|
||||
// Once we've decoded either the "match" or the "rep', we now have the
|
||||
// distance in "Rep0" (the most recently used distance) and the length
|
||||
// in "Len", so we will use DtRepeatSymbol to go back in the dictionary
|
||||
// buffer "Rep0" bytes and repeat that character "Len" times.
|
||||
//
|
||||
posBit = position & (LZMA_POSITION_COUNT - 1);
|
||||
if (RcIsBitSet(&Decoder.u.BitModel.Match[Decoder.Sequence][posBit]))
|
||||
{
|
||||
if (RcIsBitSet(&Decoder.u.BitModel.Rep[Decoder.Sequence]))
|
||||
{
|
||||
LzDecodeRep(posBit);
|
||||
}
|
||||
else
|
||||
{
|
||||
LzDecodeMatch(posBit);
|
||||
}
|
||||
|
||||
if (!DtRepeatSymbol(Decoder.Len, Decoder.Rep0 + 1))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
Decoder.Len = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
LzDecodeLiteral();
|
||||
}
|
||||
}
|
||||
RcNormalize();
|
||||
return (Decoder.Len == 0);
|
||||
}
|
||||
|
||||
void
|
||||
LzResetState (
|
||||
void
|
||||
)
|
||||
{
|
||||
//
|
||||
// Initialize decoder to default state in case we're called more than once.
|
||||
// The LZMA "Bit Model" is an adaptive arithmetic-coded probability-based
|
||||
// bit tree which encodes either a "0" or a "1".
|
||||
//
|
||||
Decoder.Sequence = LzmaLitLitLitState;
|
||||
Decoder.Rep0 = Decoder.Rep1 = Decoder.Rep2 = Decoder.Rep3 = 0;
|
||||
static_assert((LZMA_BIT_MODEL_SLOTS * 2) == sizeof(Decoder.u.BitModel),
|
||||
"Invalid size");
|
||||
for (int i = 0; i < LZMA_BIT_MODEL_SLOTS; i++)
|
||||
{
|
||||
RcSetDefaultProbability(&Decoder.u.RawProbabilities[i]);
|
||||
}
|
||||
}
|
||||
|
||||
bool
|
||||
LzInitialize (
|
||||
uint8_t Properties
|
||||
)
|
||||
{
|
||||
if (Properties != k_LzSupportedProperties)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
LzResetState();
|
||||
return true;
|
||||
}
|
114
src/minilzlib/lzmadec.h
Normal file
114
src/minilzlib/lzmadec.h
Normal file
|
@ -0,0 +1,114 @@
|
|||
/*++
|
||||
|
||||
Copyright (c) Alex Ionescu. All rights reserved.
|
||||
|
||||
Module Name:
|
||||
|
||||
lzmadec.h
|
||||
|
||||
Abstract:
|
||||
|
||||
This header file contains C-style definitions, constants, and enumerations
|
||||
that map back to the LZMA Standard, specifically the probability model that
|
||||
is used for encoding probabilities.
|
||||
|
||||
Author:
|
||||
|
||||
Alex Ionescu (@aionescu) 15-Apr-2020 - Initial version
|
||||
|
||||
Environment:
|
||||
|
||||
Windows & Linux, user mode and kernel mode.
|
||||
|
||||
--*/
|
||||
|
||||
#pragma once
|
||||
|
||||
//
|
||||
// Literals can be 0-255 and are encoded in 3 different types of slots based on
|
||||
// the previous literal decoded and the "match byte" used.
|
||||
//
|
||||
#define LZMA_LITERALS 256
|
||||
#define LZMA_LC_TYPES 3
|
||||
#define LZMA_LC_MODEL_SIZE (LZMA_LC_TYPES * LZMA_LITERALS)
|
||||
|
||||
//
|
||||
// These are the hardcoded LZMA properties we support for position and coders
|
||||
//
|
||||
#define LZMA_LC 3
|
||||
#define LZMA_PB 2
|
||||
#define LZMA_LP 0
|
||||
#define LZMA_LITERAL_CODERS (1 << LZMA_LC)
|
||||
#define LZMA_POSITION_COUNT (1 << LZMA_PB)
|
||||
|
||||
//
|
||||
// Lengths are described in three different ways using "low", "mid", and "high"
|
||||
// bit trees. The first two trees encode 3 bits, the last encodes 8. We never
|
||||
// encode a length less than 2 bytes, since that's wasteful.
|
||||
//
|
||||
#define LZMA_MAX_LOW_LENGTH (1 << 3)
|
||||
#define LZMA_MAX_MID_LENGTH (1 << 3)
|
||||
#define LZMA_MAX_HIGH_LENGTH (1 << 8)
|
||||
#define LZMA_MIN_LENGTH 2
|
||||
|
||||
//
|
||||
// Distances can be encoded in different ways, based on the distance slot.
|
||||
// Lengths of 2, 3, 4 bytes are directly encoded with their own slot. Lengths
|
||||
// over 5 share a slot, which is then further subdivded into 3 different ways
|
||||
// of encoding them, which are described in the source.
|
||||
//
|
||||
#define LZMA_DISTANCE_SLOTS 64
|
||||
#define LZMA_FIRST_CONTEXT_DISTANCE_SLOT 4
|
||||
#define LZMA_FIRST_FIXED_DISTANCE_SLOT 14
|
||||
#define LZMA_DISTANCE_ALIGN_BITS 4
|
||||
#define LZMA_DISTANCE_ALIGN_SLOTS (1 << LZMA_DISTANCE_ALIGN_BITS)
|
||||
|
||||
//
|
||||
// Total number of probabilities that we need to store
|
||||
//
|
||||
#define LZMA_BIT_MODEL_SLOTS (1174 + \
|
||||
(LZMA_LITERAL_CODERS * \
|
||||
LZMA_LC_MODEL_SIZE))
|
||||
|
||||
//
|
||||
// The LZMA probability bit model is typically based on the last LZMA sequences
|
||||
// that were decoded. There are 11 such possibilities that are tracked.
|
||||
//
|
||||
typedef enum _LZMA_SEQUENCE_STATE
|
||||
{
|
||||
//
|
||||
// State where we last saw three literals
|
||||
//
|
||||
LzmaLitLitLitState,
|
||||
//
|
||||
// States where we last saw two literals preceeded by a non-literal
|
||||
//
|
||||
LzmaMatchLitLitState,
|
||||
LzmaRepLitLitState,
|
||||
LzmaLitShortrepLitLitState,
|
||||
//
|
||||
// States where we last saw one literal preceeded by a non-literal
|
||||
//
|
||||
LzmaMatchLitState,
|
||||
LzmaRepLitState,
|
||||
LzmaLitShortrepLitState,
|
||||
//
|
||||
// Separator between states where we last saw at least one literal
|
||||
//
|
||||
LzmaMaxLitState,
|
||||
//
|
||||
// States where we last saw a non-literal preceeded by a literal
|
||||
//
|
||||
LzmaLitMatchState = 7,
|
||||
LzmaLitRepState,
|
||||
LzmaLitShortrepState,
|
||||
//
|
||||
// States where we last saw two non-literals
|
||||
//
|
||||
LzmaNonlitMatchState,
|
||||
LzmaNonlitRepState,
|
||||
//
|
||||
// Separator for number of total states
|
||||
//
|
||||
LzmaMaxState
|
||||
} LZMA_SEQUENCE_STATE, * PLZMA_SEQUENCE_STATE;
|
87
src/minilzlib/minlzlib.h
Normal file
87
src/minilzlib/minlzlib.h
Normal file
|
@ -0,0 +1,87 @@
|
|||
/*++
|
||||
|
||||
Copyright (c) Alex Ionescu. All rights reserved.
|
||||
|
||||
Module Name:
|
||||
|
||||
minlzlib.h
|
||||
|
||||
Abstract:
|
||||
|
||||
This header file is the main include for the minlz library. It contains the
|
||||
internal function definitions for the history \& input buffers, the LZMA and
|
||||
LZMA2 decoders, and the arithmetic (de)coder.
|
||||
|
||||
Author:
|
||||
|
||||
Alex Ionescu (@aionescu) 15-Apr-2020 - Initial version
|
||||
|
||||
Environment:
|
||||
|
||||
Windows & Linux, user mode and kernel mode.
|
||||
|
||||
--*/
|
||||
|
||||
#pragma once
|
||||
|
||||
//
|
||||
// C Standard Headers
|
||||
//
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
#include <stdbool.h>
|
||||
#include <assert.h>
|
||||
|
||||
//
|
||||
// Input Buffer Management
|
||||
//
|
||||
bool BfRead(uint8_t* Byte);
|
||||
bool BfSeek(uint32_t Length, uint8_t** Bytes);
|
||||
bool BfAlign(void);
|
||||
void BfInitialize(uint8_t* InputBuffer, uint32_t InputSize);
|
||||
bool BfSetSoftLimit(uint32_t Remaining);
|
||||
void BfResetSoftLimit(void);
|
||||
|
||||
//
|
||||
// Dictionary (History Buffer) Management
|
||||
//
|
||||
bool DtRepeatSymbol(uint32_t Length, uint32_t Distance);
|
||||
void DtInitialize(uint8_t* HistoryBuffer, uint32_t Position);
|
||||
bool DtSetLimit(uint32_t Limit);
|
||||
void DtPutSymbol(uint8_t Symbol);
|
||||
uint8_t DtGetSymbol(uint32_t Distance);
|
||||
bool DtCanWrite(uint32_t* Position);
|
||||
bool DtIsComplete(uint32_t* BytesProcessed);
|
||||
|
||||
//
|
||||
// Range Decoder
|
||||
//
|
||||
uint8_t RcGetBitTree(uint16_t* BitModel, uint16_t Limit);
|
||||
uint8_t RcGetReverseBitTree(uint16_t* BitModel, uint8_t HighestBit);
|
||||
uint8_t RcDecodeMatchedBitTree(uint16_t* BitModel, uint8_t MatchByte);
|
||||
uint32_t RcGetFixed(uint8_t HighestBit);
|
||||
bool RcInitialize(uint16_t* ChunkSize);
|
||||
uint8_t RcIsBitSet(uint16_t* Probability);
|
||||
void RcNormalize(void);
|
||||
bool RcCanRead(void);
|
||||
bool RcIsComplete(uint32_t* Offset);
|
||||
void RcSetDefaultProbability(uint16_t* Probability);
|
||||
|
||||
//
|
||||
// LZMA Decoder
|
||||
//
|
||||
bool LzDecode(void);
|
||||
bool LzInitialize(uint8_t Properties);
|
||||
void LzResetState(void);
|
||||
|
||||
//
|
||||
// LZMA2 Decoder
|
||||
//
|
||||
bool Lz2DecodeStream(uint32_t* BytesProcessed, bool GetSizeOnly);
|
||||
#ifdef MINLZ_INTEGRITY_CHECKS
|
||||
//
|
||||
// Checksum Management
|
||||
//
|
||||
uint32_t OsComputeCrc32(uint32_t Initial, const uint8_t* Data, uint32_t Length);
|
||||
#define Crc32(Buffer, Length) OsComputeCrc32(0, (const uint8_t*)Buffer, Length)
|
||||
#endif
|
395
src/minilzlib/rangedec.c
Normal file
395
src/minilzlib/rangedec.c
Normal file
|
@ -0,0 +1,395 @@
|
|||
/*++
|
||||
|
||||
Copyright (c) Alex Ionescu. All rights reserved.
|
||||
|
||||
Module Name:
|
||||
|
||||
rangedec.c
|
||||
|
||||
Abstract:
|
||||
|
||||
This module implements the Range Decoder, which is how LZMA describes the
|
||||
arithmetic coder that it uses to represent the binary representation of the
|
||||
LZ77 match length-distance pairs after the initial compression pass. At the
|
||||
implementation level, this coder works with an alphabet of only 2 symbols:
|
||||
the bit "0", and the bit "1", so there are only ever two probability ranges
|
||||
that need to be checked each pass. In LZMA, a probability of 100% encodes a
|
||||
"0", while 0% encodes a "1". Initially, all probabilities are assumed to be
|
||||
50%. Probabilities are stored using 11-bits (2048 \=\= 100%), and thus use 16
|
||||
bits of storage. Finally, the range decoder is adaptive, meaning that each
|
||||
time a bit is decoded, the probabilities are updated: each 0 increases the
|
||||
probability of another 0, and each 1 decrases it. The algorithm adapts the
|
||||
probabilities using an exponential moving average with a shift ratio of 5.
|
||||
|
||||
Author:
|
||||
|
||||
Alex Ionescu (@aionescu) 15-Apr-2020 - Initial version
|
||||
|
||||
Environment:
|
||||
|
||||
Windows & Linux, user mode and kernel mode.
|
||||
|
||||
--*/
|
||||
|
||||
#include "minlzlib.h"
|
||||
|
||||
//
|
||||
// The range decoder uses 11 probability bits, where 2048 is 100% chance of a 0
|
||||
//
|
||||
#define LZMA_RC_PROBABILITY_BITS 11
|
||||
#define LZMA_RC_MAX_PROBABILITY (1 << LZMA_RC_PROBABILITY_BITS)
|
||||
const uint16_t k_LzmaRcHalfProbability = LZMA_RC_MAX_PROBABILITY / 2;
|
||||
|
||||
//
|
||||
// The range decoder uses an exponential moving average of the last probability
|
||||
// hit (match or miss) with an adaptation rate of 5 bits (which falls in the
|
||||
// middle of its 11 bits used to encode a probability.
|
||||
//
|
||||
#define LZMA_RC_ADAPTATION_RATE_SHIFT 5
|
||||
|
||||
//
|
||||
// The range decoder has enough precision for the range only as long as the top
|
||||
// 8 bits are still set. Once it falls below, it needs a renormalization step.
|
||||
//
|
||||
#define LZMA_RC_MIN_RANGE (1 << 24)
|
||||
|
||||
//
|
||||
// The range decoder must be initialized with 5 bytes, the first of which is
|
||||
// ignored
|
||||
//
|
||||
#define LZMA_RC_INIT_BYTES 5
|
||||
|
||||
//
|
||||
// State used for the binary adaptive arithmetic coder (LZMA Range Decoder)
|
||||
//
|
||||
typedef struct _RANGE_DECODER_STATE
|
||||
{
|
||||
//
|
||||
// Start and end location of the current stream's range encoder buffer
|
||||
//
|
||||
uint8_t* Start;
|
||||
uint8_t* Limit;
|
||||
//
|
||||
// Current probability range and 32-bit arithmetic encoded sequence code
|
||||
//
|
||||
uint32_t Range;
|
||||
uint32_t Code;
|
||||
} RANGE_DECODER_STATE, *PRANGE_DECODER_STATE;
|
||||
RANGE_DECODER_STATE RcState;
|
||||
|
||||
bool
|
||||
RcInitialize (
|
||||
uint16_t* ChunkSize
|
||||
)
|
||||
{
|
||||
uint8_t i, rcByte;
|
||||
uint8_t* chunkEnd;
|
||||
|
||||
//
|
||||
// Make sure that the input buffer has enough space for the requirements of
|
||||
// the range encoder. We (temporarily) seek forward to validate this.
|
||||
//
|
||||
if (!BfSeek(*ChunkSize, &chunkEnd))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
BfSeek(-*ChunkSize, &chunkEnd);
|
||||
|
||||
//
|
||||
// The initial probability range is set to its highest value, after which
|
||||
// the next 5 bytes are used to initialize the initial code. Note that the
|
||||
// first byte outputted by the encoder is always going to be zero, so it is
|
||||
// ignored here.
|
||||
//
|
||||
RcState.Range = (uint32_t)-1;
|
||||
RcState.Code = 0;
|
||||
for (i = 0; i < LZMA_RC_INIT_BYTES; i++)
|
||||
{
|
||||
BfRead(&rcByte);
|
||||
RcState.Code = (RcState.Code << 8) | rcByte;
|
||||
}
|
||||
|
||||
//
|
||||
// Store our current location in the buffer now, and how far we can go on
|
||||
// reading. Then decrease the total chunk size by the count of init bytes,
|
||||
// so that the caller can check, once done (RcIsComplete), if the code has
|
||||
// become 0 exactly when the compressed chunk size has been fully consumed
|
||||
// by the decoder.
|
||||
//
|
||||
BfSeek(0, &RcState.Start);
|
||||
RcState.Limit = RcState.Start + *ChunkSize;
|
||||
*ChunkSize -= LZMA_RC_INIT_BYTES;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
RcCanRead (
|
||||
void
|
||||
)
|
||||
{
|
||||
uint8_t* pos;
|
||||
//
|
||||
// We can keep reading symbols as long as we haven't reached the end of the
|
||||
// input buffer yet.
|
||||
//
|
||||
BfSeek(0, &pos);
|
||||
return pos <= RcState.Limit;
|
||||
}
|
||||
|
||||
bool
|
||||
RcIsComplete (
|
||||
uint32_t* BytesProcessed
|
||||
)
|
||||
{
|
||||
uint8_t* pos;
|
||||
//
|
||||
// When the last symbol has been decoded, the last code should be zero as
|
||||
// there is nothing left to describe. Return the offset in the buffer where
|
||||
// this occurred (which should be equal to the compressed size).
|
||||
//
|
||||
BfSeek(0, &pos);
|
||||
*BytesProcessed = (uint32_t)(pos - RcState.Start);
|
||||
return (RcState.Code == 0);
|
||||
}
|
||||
|
||||
void
|
||||
RcNormalize (
|
||||
void
|
||||
)
|
||||
{
|
||||
uint8_t rcByte;
|
||||
//
|
||||
// Whenever we drop below 24 bits, there is no longer enough precision in
|
||||
// the probability range not to avoid a "stuck" state where we cannot tell
|
||||
// apart the two branches (above/below the probability range) because the
|
||||
// two options appear identical with the number of precision bits that we
|
||||
// have. In this case, shift the state by a byte (8 bits) and read another.
|
||||
//
|
||||
if (RcState.Range < LZMA_RC_MIN_RANGE)
|
||||
{
|
||||
RcState.Range <<= 8;
|
||||
RcState.Code <<= 8;
|
||||
BfRead(&rcByte);
|
||||
RcState.Code |= rcByte;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
RcAdapt (
|
||||
bool Miss,
|
||||
uint16_t* Probability
|
||||
)
|
||||
{
|
||||
//
|
||||
// In the canonical range encoders out there (including this one used by
|
||||
// LZMA, we want the probability to adapt (change) as we read more or less
|
||||
// bits that match our expectation. In order to quickly adapt to change,
|
||||
// use an exponential moving average. The standard way of doing this is to
|
||||
// use an integer based adaptation with a shift that's somewhere between
|
||||
// {1, bits-1}. Since LZMA uses 11 bits for its model, 5 is a nice number
|
||||
// that lands exactly between 1 and 10.
|
||||
//
|
||||
if (Miss)
|
||||
{
|
||||
*Probability -= *Probability >> LZMA_RC_ADAPTATION_RATE_SHIFT;
|
||||
}
|
||||
else
|
||||
{
|
||||
*Probability += (LZMA_RC_MAX_PROBABILITY - *Probability) >>
|
||||
LZMA_RC_ADAPTATION_RATE_SHIFT;
|
||||
}
|
||||
}
|
||||
|
||||
uint8_t
|
||||
RcIsBitSet (
|
||||
uint16_t* Probability
|
||||
)
|
||||
{
|
||||
uint32_t bound;
|
||||
uint8_t bit;
|
||||
|
||||
//
|
||||
// Always begin by making sure the range has been normalized for precision
|
||||
//
|
||||
RcNormalize();
|
||||
|
||||
//
|
||||
// Check if the current arithmetic code is descried by the next calculated
|
||||
// proportionally-divided probability range. Recall that the probabilities
|
||||
// encode the chance of the symbol (bit) being a 0 -- not a 1!
|
||||
//
|
||||
// Therefore, if the next chunk of the code lies outside of this new range,
|
||||
// we are still on the path to our 0. Otherwise, if the code is now part of
|
||||
// the newly defined range (inclusive), then we produce a 1 and limit the
|
||||
// range to produce a new range and code for the next decoding pass.
|
||||
//
|
||||
bound = (RcState.Range >> LZMA_RC_PROBABILITY_BITS) * *Probability;
|
||||
if (RcState.Code < bound)
|
||||
{
|
||||
RcState.Range = bound;
|
||||
bit = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
RcState.Range -= bound;
|
||||
RcState.Code -= bound;
|
||||
bit = 1;
|
||||
}
|
||||
|
||||
//
|
||||
// Always finish by adapt the probabilities based on the bit value
|
||||
//
|
||||
RcAdapt(bit, Probability);
|
||||
return bit;
|
||||
}
|
||||
|
||||
uint8_t
|
||||
RcIsFixedBitSet(
|
||||
void
|
||||
)
|
||||
{
|
||||
uint8_t bit;
|
||||
|
||||
//
|
||||
// This is a specialized version of RcIsBitSet with two differences:
|
||||
//
|
||||
// First, there is no adaptive probability -- it is hardcoded to 50%.
|
||||
//
|
||||
// Second, because there are 11 bits per probability, and 50% is 1<<10,
|
||||
// "(LZMA_RC_PROBABILITY_BITS) * Probability" is essentially 1. As such,
|
||||
// we can just shift by 1 (in other words, halving the range).
|
||||
//
|
||||
RcNormalize();
|
||||
RcState.Range >>= 1;
|
||||
if (RcState.Code < RcState.Range)
|
||||
{
|
||||
bit = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
RcState.Code -= RcState.Range;
|
||||
bit = 1;
|
||||
}
|
||||
return bit;
|
||||
}
|
||||
|
||||
uint8_t
|
||||
RcGetBitTree (
|
||||
uint16_t* BitModel,
|
||||
uint16_t Limit
|
||||
)
|
||||
{
|
||||
uint16_t symbol;
|
||||
|
||||
//
|
||||
// Context probability bit trees always begin at index 1. Iterate over each
|
||||
// decoded bit and just keep shifting it in place, until we reach the total
|
||||
// expected number of bits, which should never be over 8 (limit is 0x100).
|
||||
//
|
||||
// Once decoded, always subtract the limit back from the symbol since we go
|
||||
// one bit "past" the limit in the loop, as a side effect of the tree being
|
||||
// off-by-one.
|
||||
//
|
||||
for (symbol = 1; symbol < Limit; )
|
||||
{
|
||||
symbol = (symbol << 1) | RcIsBitSet(&BitModel[symbol]);
|
||||
}
|
||||
return (symbol - Limit) & 0xFF;
|
||||
}
|
||||
|
||||
uint8_t
|
||||
RcGetReverseBitTree (
|
||||
uint16_t* BitModel,
|
||||
uint8_t HighestBit
|
||||
)
|
||||
{
|
||||
uint16_t symbol;
|
||||
uint8_t i, bit, result;
|
||||
|
||||
//
|
||||
// This is the same logic as in RcGetBitTree, but with the bits actually
|
||||
// encoded in reverse order. We keep track of the probability index as the
|
||||
// "symbol" just like RcGetBitTree, but actually decode the result in the
|
||||
// opposite order.
|
||||
//
|
||||
for (i = 0, symbol = 1, result = 0; i < HighestBit; i++)
|
||||
{
|
||||
bit = RcIsBitSet(&BitModel[symbol]);
|
||||
symbol = (symbol << 1) | bit;
|
||||
result |= bit << i;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
uint8_t
|
||||
RcDecodeMatchedBitTree (
|
||||
uint16_t* BitModel,
|
||||
uint8_t MatchByte
|
||||
)
|
||||
{
|
||||
uint16_t symbol, bytePos, matchBit;
|
||||
uint8_t bit;
|
||||
|
||||
//
|
||||
// Parse each bit in the "match byte" (see LzDecodeLiteral), which we call
|
||||
// a "match bit".
|
||||
//
|
||||
// Then, treat this as a special bit tree decoding where two possible trees
|
||||
// are used: one for when the "match bit" is set, and a separate one for
|
||||
// when the "match bit" is not set. Since each tree can encode up to 256
|
||||
// symbols, each one has 0x100 slots.
|
||||
//
|
||||
// Finally, we have the original bit tree which we'll revert back to once
|
||||
// the match bits are no longer in play, which we parse for the remainder
|
||||
// of the symbol.
|
||||
//
|
||||
for (bytePos = MatchByte, symbol = 1; symbol < 0x100; bytePos <<= 1)
|
||||
{
|
||||
matchBit = (bytePos >> 7) & 1;
|
||||
|
||||
bit = RcIsBitSet(&BitModel[symbol + (0x100 * (matchBit + 1))]);
|
||||
symbol = (symbol << 1) | bit;
|
||||
|
||||
if (matchBit != bit)
|
||||
{
|
||||
while (symbol < 0x100)
|
||||
{
|
||||
symbol = (symbol << 1) | RcIsBitSet(&BitModel[symbol]);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
return symbol & 0xFF;
|
||||
}
|
||||
|
||||
uint32_t
|
||||
RcGetFixed (
|
||||
uint8_t HighestBit
|
||||
)
|
||||
{
|
||||
uint32_t symbol;
|
||||
|
||||
//
|
||||
// Fixed probability bit trees always begin at index 0. Iterate over each
|
||||
// decoded bit and just keep shifting it in place, until we reach the total
|
||||
// expected number of bits (typically never higher than 26 -- the maximum
|
||||
// number of "direct bits" that the distance of a "match" can encode).
|
||||
//
|
||||
symbol = 0;
|
||||
do
|
||||
{
|
||||
symbol = (symbol << 1) | RcIsFixedBitSet();
|
||||
} while (--HighestBit > 0);
|
||||
return symbol;
|
||||
}
|
||||
|
||||
void
|
||||
RcSetDefaultProbability (
|
||||
uint16_t* Probability
|
||||
)
|
||||
{
|
||||
//
|
||||
// By default, we initialize the probabilities to 0.5 (50% chance).
|
||||
//
|
||||
*Probability = k_LzmaRcHalfProbability;
|
||||
}
|
535
src/minilzlib/xzstream.c
Normal file
535
src/minilzlib/xzstream.c
Normal file
|
@ -0,0 +1,535 @@
|
|||
/*++
|
||||
|
||||
Copyright (c) Alex Ionescu. All rights reserved.
|
||||
|
||||
Module Name:
|
||||
|
||||
xzstream.c
|
||||
|
||||
Abstract:
|
||||
|
||||
This module implements the XZ stream format decoding, including support for
|
||||
parsing the stream header and block header, and then handing off the block
|
||||
decoding to the LZMA2 decoder. Finally, if "meta checking" is enabled, then
|
||||
the index and stream footer are also parsed and validated. Optionally, each
|
||||
of these component structures can be checked against its CRC32 checksum, if
|
||||
"integrity checking" has been enabled. Note that this library only supports
|
||||
single-stream, single-block XZ files that have CRC32 (or None) set as their
|
||||
block checking algorithm. Finally, no BJC filters are supported, and files
|
||||
with a compressed/uncompressed size metadata indicator are not handled.
|
||||
|
||||
Author:
|
||||
|
||||
Alex Ionescu (@aionescu) 15-Apr-2020 - Initial version
|
||||
|
||||
Environment:
|
||||
|
||||
Windows & Linux, user mode and kernel mode.
|
||||
|
||||
--*/
|
||||
|
||||
#include "minlzlib.h"
|
||||
#include "xzstream.h"
|
||||
#include "../utils.h"
|
||||
|
||||
//
|
||||
// XzDecodeBlockHeader can return "I successfully found a block",
|
||||
// "I failed/bad block header", or "there was no block header".
|
||||
// Though minlzlib explicitly only claims to handle files with a
|
||||
// single block, it needs to also handle files with no blocks at all.
|
||||
// (Produced by "xz" when compressing an empty input file)
|
||||
//
|
||||
typedef enum _XZ_DECODE_BLOCK_HEADER_RESULT {
|
||||
XzBlockHeaderFail = 0,
|
||||
XzBlockHeaderSuccess = 1,
|
||||
XzBlockHeaderNoBlock = 2
|
||||
} XZ_DECODE_BLOCK_HEADER_RESULT;
|
||||
|
||||
const uint8_t k_XzLzma2FilterIdentifier = 0x21;
|
||||
|
||||
#ifdef _WIN32
|
||||
void __security_check_cookie(_In_ uintptr_t _StackCookie) { (void)(_StackCookie); }
|
||||
#endif
|
||||
|
||||
#ifdef MINLZ_META_CHECKS
|
||||
//
|
||||
// XZ Stream Container State
|
||||
//
|
||||
typedef struct _CONTAINER_STATE
|
||||
{
|
||||
//
|
||||
// Size of the XZ header and the index, used to validate against footer
|
||||
//
|
||||
uint32_t HeaderSize;
|
||||
uint32_t IndexSize;
|
||||
//
|
||||
// Size of the compressed block and its checksum
|
||||
//
|
||||
uint32_t UncompressedBlockSize;
|
||||
uint32_t UnpaddedBlockSize;
|
||||
uint32_t ChecksumSize;
|
||||
} CONTAINER_STATE, * PCONTAINER_STATE;
|
||||
CONTAINER_STATE Container;
|
||||
#endif
|
||||
|
||||
#ifdef MINLZ_META_CHECKS
|
||||
bool
|
||||
XzDecodeVli (
|
||||
vli_type* Vli
|
||||
)
|
||||
{
|
||||
uint8_t vliByte;
|
||||
uint32_t bitPos;
|
||||
|
||||
//
|
||||
// Read the initial VLI byte (might be the value itself)
|
||||
//
|
||||
if (!BfRead(&vliByte))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
*Vli = vliByte & 0x7F;
|
||||
|
||||
//
|
||||
// Check if this was a complex VLI (and we have space for it)
|
||||
//
|
||||
bitPos = 7;
|
||||
while ((vliByte & 0x80) != 0)
|
||||
{
|
||||
//
|
||||
// Read the next byte
|
||||
//
|
||||
if (!BfRead(&vliByte))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
//
|
||||
// Make sure we're not decoding an invalid VLI
|
||||
//
|
||||
if ((bitPos == (7 * VLI_BYTES_MAX)) || (vliByte == 0))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
//
|
||||
// Decode it and move to the next 7 bits
|
||||
//
|
||||
*Vli |= (vli_type)((vliByte & 0x7F) << bitPos);
|
||||
bitPos += 7;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
XzDecodeIndex (
|
||||
void
|
||||
)
|
||||
{
|
||||
uint32_t vli;
|
||||
uint8_t* indexStart;
|
||||
uint8_t* indexEnd;
|
||||
uint32_t* pCrc32;
|
||||
uint8_t indexByte;
|
||||
|
||||
//
|
||||
// Remember where the index started so we can compute its size
|
||||
//
|
||||
BfSeek(0, &indexStart);
|
||||
|
||||
//
|
||||
// The index always starts out with an empty byte
|
||||
//
|
||||
if (!BfRead(&indexByte) || (indexByte != 0))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
//
|
||||
// Then the count of blocks, which we expect to be 1
|
||||
//
|
||||
if (!XzDecodeVli(&vli) || (vli != 1))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
//
|
||||
// Then the unpadded block size, which should match
|
||||
//
|
||||
if (!XzDecodeVli(&vli) || (Container.UnpaddedBlockSize != vli))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
//
|
||||
// Then the uncompressed block size, which should match
|
||||
//
|
||||
if (!XzDecodeVli(&vli) || (Container.UncompressedBlockSize != vli))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
//
|
||||
// Then we pad to the next multiple of 4
|
||||
//
|
||||
if (!BfAlign())
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
//
|
||||
// Store the index size with padding to validate the footer later
|
||||
//
|
||||
BfSeek(0, &indexEnd);
|
||||
Container.IndexSize = (uint32_t)(indexEnd - indexStart);
|
||||
|
||||
//
|
||||
// Read the CRC32, which is not part of the index size
|
||||
//
|
||||
if (!BfSeek(sizeof(*pCrc32), (uint8_t**)&pCrc32))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
#ifdef MINLZ_INTEGRITY_CHECKS
|
||||
//
|
||||
// Make sure the index is not corrupt
|
||||
//
|
||||
if (Crc32(indexStart, Container.IndexSize) != *pCrc32)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
XzDecodeStreamFooter (
|
||||
void
|
||||
)
|
||||
{
|
||||
PXZ_STREAM_FOOTER streamFooter;
|
||||
|
||||
//
|
||||
// Seek past the footer, making sure we have space in the input stream
|
||||
//
|
||||
if (!BfSeek(sizeof(*streamFooter), (uint8_t**)&streamFooter))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
//
|
||||
// Validate the footer magic
|
||||
//
|
||||
if (streamFooter->Magic != 'ZY')
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
//
|
||||
// Validate no flags other than checksum type are set
|
||||
//
|
||||
if ((streamFooter->u.Flags != 0) &&
|
||||
((streamFooter->u.s.CheckType != XzCheckTypeCrc32) &&
|
||||
(streamFooter->u.s.CheckType != XzCheckTypeNone)))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
//
|
||||
// Validate if the footer accurately describes the size of the index
|
||||
//
|
||||
if (Container.IndexSize != (streamFooter->BackwardSize * 4))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
#ifdef MINLZ_INTEGRITY_CHECKS
|
||||
//
|
||||
// Compute the footer's CRC32 and make sure it's not corrupted
|
||||
//
|
||||
if (Crc32(&streamFooter->BackwardSize,
|
||||
sizeof(streamFooter->BackwardSize) +
|
||||
sizeof(streamFooter->u.Flags)) !=
|
||||
streamFooter->Crc32)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
||||
bool
|
||||
XzDecodeBlock (
|
||||
uint8_t* OutputBuffer,
|
||||
uint32_t* BlockSize
|
||||
)
|
||||
{
|
||||
#ifdef MINLZ_META_CHECKS
|
||||
uint8_t *inputStart, *inputEnd;
|
||||
#endif
|
||||
//
|
||||
// Decode the LZMA2 stream. If full integrity checking is enabled, also
|
||||
// save the offset before and after decoding, so we can save the block
|
||||
// sizes and compare them against the footer and index after decoding.
|
||||
//
|
||||
#ifdef MINLZ_META_CHECKS
|
||||
BfSeek(0, &inputStart);
|
||||
#endif
|
||||
if (!Lz2DecodeStream(BlockSize, OutputBuffer == NULL))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
#ifdef MINLZ_META_CHECKS
|
||||
BfSeek(0, &inputEnd);
|
||||
Container.UnpaddedBlockSize = Container.HeaderSize +
|
||||
(uint32_t)(inputEnd - inputStart);
|
||||
Container.UncompressedBlockSize = *BlockSize;
|
||||
#endif
|
||||
//
|
||||
// After the block data, we need to pad to 32-bit alignment
|
||||
//
|
||||
if (!BfAlign())
|
||||
{
|
||||
return false;
|
||||
}
|
||||
#if defined(MINLZ_INTEGRITY_CHECKS) || defined(MINLZ_META_CHECKS)
|
||||
//
|
||||
// Finally, move past the size of the checksum if any, then compare it with
|
||||
// with the actual CRC32 of the block, if integrity checks are enabled. If
|
||||
// meta checks are enabled, update the block size so the index checking can
|
||||
// validate it.
|
||||
//
|
||||
if (!BfSeek(Container.ChecksumSize, &inputEnd))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
(void)(OutputBuffer);
|
||||
#ifdef MINLZ_INTEGRITY_CHECKS
|
||||
if ((OutputBuffer != NULL) &&
|
||||
(Crc32(OutputBuffer, *BlockSize) != *(uint32_t*)inputEnd))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
#ifdef MINLZ_META_CHECKS
|
||||
Container.UnpaddedBlockSize += Container.ChecksumSize;
|
||||
#endif
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
XzDecodeStreamHeader (
|
||||
void
|
||||
)
|
||||
{
|
||||
PXZ_STREAM_HEADER streamHeader;
|
||||
|
||||
//
|
||||
// Seek past the header, making sure we have space in the input stream
|
||||
//
|
||||
if (!BfSeek(sizeof(*streamHeader), (uint8_t**)&streamHeader))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
#ifdef MINLZ_META_CHECKS
|
||||
//
|
||||
// Validate the header magic
|
||||
//
|
||||
if ((*(uint32_t*)&streamHeader->Magic[1] != 'ZXz7') ||
|
||||
(streamHeader->Magic[0] != 0xFD) ||
|
||||
(streamHeader->Magic[5] != 0x00))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
//
|
||||
// Validate no flags other than checksum type are set
|
||||
//
|
||||
if ((streamHeader->u.Flags != 0) &&
|
||||
((streamHeader->u.s.CheckType != XzCheckTypeCrc32) &&
|
||||
(streamHeader->u.s.CheckType != XzCheckTypeNone)))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
//
|
||||
// Remember that a checksum might come at the end of the block later
|
||||
//
|
||||
Container.ChecksumSize = streamHeader->u.s.CheckType * 4;
|
||||
#endif
|
||||
#ifdef MINLZ_INTEGRITY_CHECKS
|
||||
//
|
||||
// Compute the header's CRC32 and make sure it's not corrupted
|
||||
//
|
||||
if (Crc32(&streamHeader->u.Flags, sizeof(streamHeader->u.Flags)) !=
|
||||
streamHeader->Crc32)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
return true;
|
||||
}
|
||||
|
||||
XZ_DECODE_BLOCK_HEADER_RESULT
|
||||
XzDecodeBlockHeader (
|
||||
void
|
||||
)
|
||||
{
|
||||
PXZ_BLOCK_HEADER blockHeader;
|
||||
#ifdef MINLZ_META_CHECKS
|
||||
uint32_t size;
|
||||
#endif
|
||||
//
|
||||
// Seek past the header, making sure we have space in the input stream
|
||||
//
|
||||
if (!BfSeek(sizeof(*blockHeader), (uint8_t**)&blockHeader))
|
||||
{
|
||||
return XzBlockHeaderFail;
|
||||
}
|
||||
if (blockHeader->Size == 0)
|
||||
{
|
||||
//
|
||||
// That's no block! That's an index!
|
||||
//
|
||||
BfSeek((uint32_t)(-(uint16_t)sizeof(*blockHeader)),
|
||||
(uint8_t**)&blockHeader);
|
||||
return XzBlockHeaderNoBlock;
|
||||
}
|
||||
#ifdef MINLZ_META_CHECKS
|
||||
//
|
||||
// Validate that the size of the header is what we expect
|
||||
//
|
||||
Container.HeaderSize = (blockHeader->Size + 1) * 4;
|
||||
if (Container.HeaderSize != sizeof(*blockHeader))
|
||||
{
|
||||
return XzBlockHeaderFail;
|
||||
}
|
||||
|
||||
//
|
||||
// Validate that no additional flags or filters are enabled
|
||||
//
|
||||
if (blockHeader->u.Flags != 0)
|
||||
{
|
||||
return XzBlockHeaderFail;
|
||||
}
|
||||
|
||||
//
|
||||
// Validate that the only filter is the LZMA2 filter
|
||||
//
|
||||
if (blockHeader->LzmaFlags.Id != k_XzLzma2FilterIdentifier)
|
||||
{
|
||||
return XzBlockHeaderFail;
|
||||
}
|
||||
|
||||
//
|
||||
// With the expected number of property bytes
|
||||
//
|
||||
if (blockHeader->LzmaFlags.Size
|
||||
!= sizeof(blockHeader->LzmaFlags.u.Properties))
|
||||
{
|
||||
return XzBlockHeaderFail;
|
||||
}
|
||||
|
||||
//
|
||||
// The only property is the dictionary size, make sure it is valid.
|
||||
//
|
||||
// We don't actually need to store or compare the size with anything since
|
||||
// the library expects the caller to always put in a buffer that's large
|
||||
// enough to contain the full uncompressed file (or calling it in "get size
|
||||
// only" mode to get this information).
|
||||
//
|
||||
// This output buffer can thus be smaller than the size of the dictionary
|
||||
// which is absolutely OK as long as that's actually the size of the output
|
||||
// file. If callers pass in a buffer size that's too small, decoding will
|
||||
// fail at later stages anyway, and that's incorrect use of minlzlib.
|
||||
//
|
||||
size = blockHeader->LzmaFlags.u.s.DictionarySize;
|
||||
if (size > 39)
|
||||
{
|
||||
return XzBlockHeaderFail;
|
||||
}
|
||||
#ifdef MINLZ_INTEGRITY_CHECKS
|
||||
//
|
||||
// Compute the header's CRC32 and make sure it's not corrupted
|
||||
//
|
||||
if (Crc32(blockHeader,
|
||||
Container.HeaderSize - sizeof(blockHeader->Crc32)) !=
|
||||
blockHeader->Crc32)
|
||||
{
|
||||
return XzBlockHeaderFail;
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
return XzBlockHeaderSuccess;
|
||||
}
|
||||
|
||||
bool
|
||||
XzDecode (
|
||||
uint8_t* InputBuffer,
|
||||
uint32_t InputSize,
|
||||
uint8_t* OutputBuffer,
|
||||
uint32_t* OutputSize
|
||||
)
|
||||
{
|
||||
|
||||
printf("Output size: %p %ld\n", OutputSize, *OutputSize);
|
||||
//
|
||||
// Initialize the input buffer descriptor and history buffer (dictionary)
|
||||
//
|
||||
BfInitialize(InputBuffer, InputSize);
|
||||
DtInitialize(OutputBuffer, *OutputSize);
|
||||
|
||||
//
|
||||
// Decode the stream header for check for validity
|
||||
//
|
||||
if (!XzDecodeStreamHeader())
|
||||
{
|
||||
printf("header decode failed\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
//
|
||||
// Decode the block header for check for validity
|
||||
//
|
||||
switch (XzDecodeBlockHeader())
|
||||
{
|
||||
case XzBlockHeaderFail:
|
||||
printf("block header failed\n");
|
||||
return false;
|
||||
case XzBlockHeaderNoBlock:
|
||||
printf("no block\n");
|
||||
*OutputSize = 0;
|
||||
break;
|
||||
case XzBlockHeaderSuccess:
|
||||
printf("block ok\n");
|
||||
//
|
||||
// Decode the actual block
|
||||
//
|
||||
if (!XzDecodeBlock(OutputBuffer, OutputSize))
|
||||
{
|
||||
printf("block decode failed\n");
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
#ifdef MINLZ_META_CHECKS
|
||||
//
|
||||
// Decode the index for validity checks
|
||||
//
|
||||
if (!XzDecodeIndex())
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
//
|
||||
// And finally decode the footer as a final set of checks
|
||||
//
|
||||
if (!XzDecodeStreamFooter())
|
||||
{
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
return true;
|
||||
}
|
124
src/minilzlib/xzstream.h
Normal file
124
src/minilzlib/xzstream.h
Normal file
|
@ -0,0 +1,124 @@
|
|||
/*++
|
||||
|
||||
Copyright (c) Alex Ionescu. All rights reserved.
|
||||
|
||||
Module Name:
|
||||
|
||||
xzstream.h
|
||||
|
||||
Abstract:
|
||||
|
||||
This header file contains C-style data structures and enumerations that map
|
||||
back to the XZ stream and file format standard, including for the decoding
|
||||
of Variable Length Integers (VLI). This includes definitions for the stream
|
||||
header, block header, index and stream footer, and associated check types.
|
||||
|
||||
Author:
|
||||
|
||||
Alex Ionescu (@aionescu) 15-Apr-2020 - Initial version
|
||||
|
||||
Environment:
|
||||
|
||||
Windows & Linux, user mode and kernel mode.
|
||||
|
||||
--*/
|
||||
|
||||
#pragma once
|
||||
#pragma warning(disable:4214)
|
||||
|
||||
//
|
||||
// XZ streams encode certain numbers as "variable length integers", with 7 bits
|
||||
// for the data, and a high bit to encode that another byte must be consumed.
|
||||
//
|
||||
typedef uint32_t vli_type;
|
||||
#define VLI_BYTES_MAX (sizeof(vli_type) * 8 / 7)
|
||||
|
||||
//
|
||||
// These are the possible supported types for integrity checking in an XZ file
|
||||
//
|
||||
typedef enum _XZ_CHECK_TYPES
|
||||
{
|
||||
XzCheckTypeNone = 0,
|
||||
XzCheckTypeCrc32 = 1,
|
||||
XzCheckTypeCrc64 = 4,
|
||||
XzCheckTypeSha2 = 10
|
||||
} XZ_CHECK_TYPES;
|
||||
|
||||
//
|
||||
// This describes the first 12 bytes of any XZ container file / stream
|
||||
//
|
||||
typedef struct _XZ_STREAM_HEADER
|
||||
{
|
||||
uint8_t Magic[6];
|
||||
union
|
||||
{
|
||||
struct
|
||||
{
|
||||
uint8_t ReservedFlags;
|
||||
uint8_t CheckType : 4;
|
||||
uint8_t ReservedType : 4;
|
||||
} s;
|
||||
uint16_t Flags;
|
||||
} u;
|
||||
uint32_t Crc32;
|
||||
} XZ_STREAM_HEADER, * PXZ_STREAM_HEADER;
|
||||
static_assert(sizeof(XZ_STREAM_HEADER) == 12, "Invalid Stream Header Size");
|
||||
|
||||
//
|
||||
// This describes the last 12 bytes of any XZ container file / stream
|
||||
//
|
||||
typedef struct _XZ_STREAM_FOOTER
|
||||
{
|
||||
uint32_t Crc32;
|
||||
uint32_t BackwardSize;
|
||||
union
|
||||
{
|
||||
struct
|
||||
{
|
||||
uint8_t ReservedFlags;
|
||||
uint8_t CheckType : 4;
|
||||
uint8_t ReservedType : 4;
|
||||
} s;
|
||||
uint16_t Flags;
|
||||
} u;
|
||||
uint16_t Magic;
|
||||
} XZ_STREAM_FOOTER, * PXZ_STREAM_FOOTER;
|
||||
static_assert(sizeof(XZ_STREAM_FOOTER) == 12, "Invalid Stream Footer Size");
|
||||
|
||||
//
|
||||
// This describes the beginning of a compressed payload stored in an XZ stream,
|
||||
// with hardcoded expectations for an LZMA2-compressed payload that has 0 extra
|
||||
// filters (such as BCJ2).
|
||||
//
|
||||
typedef struct _XZ_BLOCK_HEADER
|
||||
{
|
||||
uint8_t Size;
|
||||
union
|
||||
{
|
||||
struct
|
||||
{
|
||||
uint8_t FilterCount : 2;
|
||||
uint8_t Reserved : 4;
|
||||
uint8_t HasCompressedSize : 1;
|
||||
uint8_t HasUncompressedSize : 1;
|
||||
} s;
|
||||
uint8_t Flags;
|
||||
} u;
|
||||
struct
|
||||
{
|
||||
uint8_t Id;
|
||||
uint8_t Size;
|
||||
union
|
||||
{
|
||||
struct
|
||||
{
|
||||
uint8_t DictionarySize : 6;
|
||||
uint8_t Reserved : 2;
|
||||
} s;
|
||||
uint8_t Properties;
|
||||
} u;
|
||||
} LzmaFlags;
|
||||
uint8_t Padding[3];
|
||||
uint32_t Crc32;
|
||||
} XZ_BLOCK_HEADER, * PXZ_BLOCK_HEADER;
|
||||
static_assert(sizeof(XZ_BLOCK_HEADER) == 12, "Invalid Block Header Size");
|
32
src/minlzma.h
Normal file
32
src/minlzma.h
Normal file
|
@ -0,0 +1,32 @@
|
|||
#pragma once
|
||||
|
||||
#include <stdbool.h>
|
||||
|
||||
/*!
|
||||
* @brief Decompresses an XZ stream from InputBuffer into OutputBuffer.
|
||||
*
|
||||
* @detail The XZ stream must contain a single block with an LZMA2 filter
|
||||
* and no BJC2 filters, using default LZMA properties, and using
|
||||
* either CRC32 or None as the checksum type.
|
||||
*
|
||||
* @param[in] InputBuffer - A fully formed buffer containing the XZ stream.
|
||||
* @param[in] InputSize - The size of the input buffer.
|
||||
* @param[in] OutputBuffer - A fully allocated buffer to receive the output.
|
||||
* Callers can pass in NULL if they do not intend to decompress,
|
||||
* in combination with setting OutputSize to 0, in order to query
|
||||
* the final expected size of the decompressed buffer.
|
||||
* @param[in,out] OutputSize - On input, the size of the buffer. On output, the
|
||||
* size of the decompressed result.
|
||||
*
|
||||
* @return true - The input buffer was fully decompressed in OutputBuffer,
|
||||
* or no decompression was requested, the size of the decompressed
|
||||
* buffer was returned in OutputSIze.
|
||||
* false - A failure occurred during the decompression process.
|
||||
*/
|
||||
bool
|
||||
XzDecode (
|
||||
uint8_t* InputBuffer,
|
||||
uint32_t InputSize,
|
||||
uint8_t* OutputBuffer,
|
||||
uint32_t* OutputSize
|
||||
);
|
24
src/proxy.c
24
src/proxy.c
|
@ -1,5 +1,7 @@
|
|||
#include "proxy.h"
|
||||
#include "memory.h"
|
||||
#include "minlzma.h"
|
||||
#include "tinf/tinf.h"
|
||||
#include "types.h"
|
||||
#include "uart.h"
|
||||
#include "utils.h"
|
||||
|
@ -178,6 +180,28 @@ int proxy_process(ProxyRequest *request, ProxyReply *reply)
|
|||
dc_civac_range((void *)request->args[0], request->args[1]);
|
||||
break;
|
||||
|
||||
case P_XZDEC: {
|
||||
u32 output_size = request->args[3];
|
||||
if (XzDecode((void *)request->args[0], request->args[1],
|
||||
(void *)request->args[2], &output_size))
|
||||
reply->retval = output_size;
|
||||
else
|
||||
reply->retval = ~0L;
|
||||
break;
|
||||
}
|
||||
case P_GZDEC: {
|
||||
unsigned int destlen, srclen;
|
||||
destlen = request->args[3];
|
||||
srclen = request->args[1];
|
||||
size_t ret = tinf_gzip_uncompress((void *)request->args[2], &destlen,
|
||||
(void *)request->args[0], srclen);
|
||||
if (ret != TINF_OK)
|
||||
reply->retval = ret;
|
||||
else
|
||||
reply->retval = destlen;
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
reply->status = S_BADCMD;
|
||||
break;
|
||||
|
|
|
@ -53,6 +53,9 @@ typedef enum {
|
|||
P_DC_CVAU,
|
||||
P_DC_CIVAC,
|
||||
|
||||
P_XZDEC = 0x400,
|
||||
P_GZDEC,
|
||||
|
||||
} ProxyOp;
|
||||
|
||||
#define S_OK 0
|
||||
|
|
95
src/tinf/adler32.c
Normal file
95
src/tinf/adler32.c
Normal file
|
@ -0,0 +1,95 @@
|
|||
/*
|
||||
* Adler-32 checksum
|
||||
*
|
||||
* Copyright (c) 2003-2019 Joergen Ibsen
|
||||
*
|
||||
* This software is provided 'as-is', without any express or implied
|
||||
* warranty. In no event will the authors be held liable for any damages
|
||||
* arising from the use of this software.
|
||||
*
|
||||
* Permission is granted to anyone to use this software for any purpose,
|
||||
* including commercial applications, and to alter it and redistribute it
|
||||
* freely, subject to the following restrictions:
|
||||
*
|
||||
* 1. The origin of this software must not be misrepresented; you must
|
||||
* not claim that you wrote the original software. If you use this
|
||||
* software in a product, an acknowledgment in the product
|
||||
* documentation would be appreciated but is not required.
|
||||
*
|
||||
* 2. Altered source versions must be plainly marked as such, and must
|
||||
* not be misrepresented as being the original software.
|
||||
*
|
||||
* 3. This notice may not be removed or altered from any source
|
||||
* distribution.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Adler-32 algorithm taken from the zlib source, which is
|
||||
* Copyright (C) 1995-1998 Jean-loup Gailly and Mark Adler
|
||||
*/
|
||||
|
||||
#include "tinf.h"
|
||||
|
||||
#define A32_BASE 65521
|
||||
#define A32_NMAX 5552
|
||||
|
||||
unsigned int tinf_adler32(const void *data, unsigned int length)
|
||||
{
|
||||
const unsigned char *buf = (const unsigned char *) data;
|
||||
|
||||
unsigned int s1 = 1;
|
||||
unsigned int s2 = 0;
|
||||
|
||||
while (length > 0) {
|
||||
int k = length < A32_NMAX ? length : A32_NMAX;
|
||||
int i;
|
||||
|
||||
for (i = k / 16; i; --i, buf += 16) {
|
||||
s1 += buf[0];
|
||||
s2 += s1;
|
||||
s1 += buf[1];
|
||||
s2 += s1;
|
||||
s1 += buf[2];
|
||||
s2 += s1;
|
||||
s1 += buf[3];
|
||||
s2 += s1;
|
||||
s1 += buf[4];
|
||||
s2 += s1;
|
||||
s1 += buf[5];
|
||||
s2 += s1;
|
||||
s1 += buf[6];
|
||||
s2 += s1;
|
||||
s1 += buf[7];
|
||||
s2 += s1;
|
||||
|
||||
s1 += buf[8];
|
||||
s2 += s1;
|
||||
s1 += buf[9];
|
||||
s2 += s1;
|
||||
s1 += buf[10];
|
||||
s2 += s1;
|
||||
s1 += buf[11];
|
||||
s2 += s1;
|
||||
s1 += buf[12];
|
||||
s2 += s1;
|
||||
s1 += buf[13];
|
||||
s2 += s1;
|
||||
s1 += buf[14];
|
||||
s2 += s1;
|
||||
s1 += buf[15];
|
||||
s2 += s1;
|
||||
}
|
||||
|
||||
for (i = k % 16; i; --i) {
|
||||
s1 += *buf++;
|
||||
s2 += s1;
|
||||
}
|
||||
|
||||
s1 %= A32_BASE;
|
||||
s2 %= A32_BASE;
|
||||
|
||||
length -= k;
|
||||
}
|
||||
|
||||
return (s2 << 16) | s1;
|
||||
}
|
57
src/tinf/crc32.c
Normal file
57
src/tinf/crc32.c
Normal file
|
@ -0,0 +1,57 @@
|
|||
/*
|
||||
* CRC32 checksum
|
||||
*
|
||||
* Copyright (c) 1998-2019 Joergen Ibsen
|
||||
*
|
||||
* This software is provided 'as-is', without any express or implied
|
||||
* warranty. In no event will the authors be held liable for any damages
|
||||
* arising from the use of this software.
|
||||
*
|
||||
* Permission is granted to anyone to use this software for any purpose,
|
||||
* including commercial applications, and to alter it and redistribute it
|
||||
* freely, subject to the following restrictions:
|
||||
*
|
||||
* 1. The origin of this software must not be misrepresented; you must
|
||||
* not claim that you wrote the original software. If you use this
|
||||
* software in a product, an acknowledgment in the product
|
||||
* documentation would be appreciated but is not required.
|
||||
*
|
||||
* 2. Altered source versions must be plainly marked as such, and must
|
||||
* not be misrepresented as being the original software.
|
||||
*
|
||||
* 3. This notice may not be removed or altered from any source
|
||||
* distribution.
|
||||
*/
|
||||
|
||||
/*
|
||||
* CRC32 algorithm taken from the zlib source, which is
|
||||
* Copyright (C) 1995-1998 Jean-loup Gailly and Mark Adler
|
||||
*/
|
||||
|
||||
#include "tinf.h"
|
||||
|
||||
static const unsigned int tinf_crc32tab[16] = {
|
||||
0x00000000, 0x1DB71064, 0x3B6E20C8, 0x26D930AC, 0x76DC4190,
|
||||
0x6B6B51F4, 0x4DB26158, 0x5005713C, 0xEDB88320, 0xF00F9344,
|
||||
0xD6D6A3E8, 0xCB61B38C, 0x9B64C2B0, 0x86D3D2D4, 0xA00AE278,
|
||||
0xBDBDF21C
|
||||
};
|
||||
|
||||
unsigned int tinf_crc32(const void *data, unsigned int length)
|
||||
{
|
||||
const unsigned char *buf = (const unsigned char *) data;
|
||||
unsigned int crc = 0xFFFFFFFF;
|
||||
unsigned int i;
|
||||
|
||||
if (length == 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
for (i = 0; i < length; ++i) {
|
||||
crc ^= buf[i];
|
||||
crc = tinf_crc32tab[crc & 0x0F] ^ (crc >> 4);
|
||||
crc = tinf_crc32tab[crc & 0x0F] ^ (crc >> 4);
|
||||
}
|
||||
|
||||
return crc ^ 0xFFFFFFFF;
|
||||
}
|
140
src/tinf/tinf.h
Normal file
140
src/tinf/tinf.h
Normal file
|
@ -0,0 +1,140 @@
|
|||
/*
|
||||
* tinf - tiny inflate library (inflate, gzip, zlib)
|
||||
*
|
||||
* Copyright (c) 2003-2019 Joergen Ibsen
|
||||
*
|
||||
* This software is provided 'as-is', without any express or implied
|
||||
* warranty. In no event will the authors be held liable for any damages
|
||||
* arising from the use of this software.
|
||||
*
|
||||
* Permission is granted to anyone to use this software for any purpose,
|
||||
* including commercial applications, and to alter it and redistribute it
|
||||
* freely, subject to the following restrictions:
|
||||
*
|
||||
* 1. The origin of this software must not be misrepresented; you must
|
||||
* not claim that you wrote the original software. If you use this
|
||||
* software in a product, an acknowledgment in the product
|
||||
* documentation would be appreciated but is not required.
|
||||
*
|
||||
* 2. Altered source versions must be plainly marked as such, and must
|
||||
* not be misrepresented as being the original software.
|
||||
*
|
||||
* 3. This notice may not be removed or altered from any source
|
||||
* distribution.
|
||||
*/
|
||||
|
||||
#ifndef TINF_H_INCLUDED
|
||||
#define TINF_H_INCLUDED
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define TINF_VER_MAJOR 1 /**< Major version number */
|
||||
#define TINF_VER_MINOR 2 /**< Minor version number */
|
||||
#define TINF_VER_PATCH 1 /**< Patch version number */
|
||||
#define TINF_VER_STRING "1.2.1" /**< Version number as a string */
|
||||
|
||||
#ifndef TINFCC
|
||||
# ifdef __WATCOMC__
|
||||
# define TINFCC __cdecl
|
||||
# else
|
||||
# define TINFCC
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Status codes returned.
|
||||
*
|
||||
* @see tinf_uncompress, tinf_gzip_uncompress, tinf_zlib_uncompress
|
||||
*/
|
||||
typedef enum {
|
||||
TINF_OK = 0, /**< Success */
|
||||
TINF_DATA_ERROR = -3, /**< Input error */
|
||||
TINF_BUF_ERROR = -5 /**< Not enough room for output */
|
||||
} tinf_error_code;
|
||||
|
||||
/**
|
||||
* Initialize global data used by tinf.
|
||||
*
|
||||
* @deprecated No longer required, may be removed in a future version.
|
||||
*/
|
||||
void TINFCC tinf_init(void);
|
||||
|
||||
/**
|
||||
* Decompress `sourceLen` bytes of deflate data from `source` to `dest`.
|
||||
*
|
||||
* The variable `destLen` points to must contain the size of `dest` on entry,
|
||||
* and will be set to the size of the decompressed data on success.
|
||||
*
|
||||
* Reads at most `sourceLen` bytes from `source`.
|
||||
* Writes at most `*destLen` bytes to `dest`.
|
||||
*
|
||||
* @param dest pointer to where to place decompressed data
|
||||
* @param destLen pointer to variable containing size of `dest`
|
||||
* @param source pointer to compressed data
|
||||
* @param sourceLen size of compressed data
|
||||
* @return `TINF_OK` on success, error code on error
|
||||
*/
|
||||
int TINFCC tinf_uncompress(void *dest, unsigned int *destLen,
|
||||
const void *source, unsigned int sourceLen);
|
||||
|
||||
/**
|
||||
* Decompress `sourceLen` bytes of gzip data from `source` to `dest`.
|
||||
*
|
||||
* The variable `destLen` points to must contain the size of `dest` on entry,
|
||||
* and will be set to the size of the decompressed data on success.
|
||||
*
|
||||
* Reads at most `sourceLen` bytes from `source`.
|
||||
* Writes at most `*destLen` bytes to `dest`.
|
||||
*
|
||||
* @param dest pointer to where to place decompressed data
|
||||
* @param destLen pointer to variable containing size of `dest`
|
||||
* @param source pointer to compressed data
|
||||
* @param sourceLen size of compressed data
|
||||
* @return `TINF_OK` on success, error code on error
|
||||
*/
|
||||
int TINFCC tinf_gzip_uncompress(void *dest, unsigned int *destLen,
|
||||
const void *source, unsigned int sourceLen);
|
||||
|
||||
/**
|
||||
* Decompress `sourceLen` bytes of zlib data from `source` to `dest`.
|
||||
*
|
||||
* The variable `destLen` points to must contain the size of `dest` on entry,
|
||||
* and will be set to the size of the decompressed data on success.
|
||||
*
|
||||
* Reads at most `sourceLen` bytes from `source`.
|
||||
* Writes at most `*destLen` bytes to `dest`.
|
||||
*
|
||||
* @param dest pointer to where to place decompressed data
|
||||
* @param destLen pointer to variable containing size of `dest`
|
||||
* @param source pointer to compressed data
|
||||
* @param sourceLen size of compressed data
|
||||
* @return `TINF_OK` on success, error code on error
|
||||
*/
|
||||
int TINFCC tinf_zlib_uncompress(void *dest, unsigned int *destLen,
|
||||
const void *source, unsigned int sourceLen);
|
||||
|
||||
/**
|
||||
* Compute Adler-32 checksum of `length` bytes starting at `data`.
|
||||
*
|
||||
* @param data pointer to data
|
||||
* @param length size of data
|
||||
* @return Adler-32 checksum
|
||||
*/
|
||||
unsigned int TINFCC tinf_adler32(const void *data, unsigned int length);
|
||||
|
||||
/**
|
||||
* Compute CRC32 checksum of `length` bytes starting at `data`.
|
||||
*
|
||||
* @param data pointer to data
|
||||
* @param length size of data
|
||||
* @return CRC32 checksum
|
||||
*/
|
||||
unsigned int TINFCC tinf_crc32(const void *data, unsigned int length);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif /* TINF_H_INCLUDED */
|
172
src/tinf/tinfgzip.c
Normal file
172
src/tinf/tinfgzip.c
Normal file
|
@ -0,0 +1,172 @@
|
|||
/*
|
||||
* tinfgzip - tiny gzip decompressor
|
||||
*
|
||||
* Copyright (c) 2003-2019 Joergen Ibsen
|
||||
*
|
||||
* This software is provided 'as-is', without any express or implied
|
||||
* warranty. In no event will the authors be held liable for any damages
|
||||
* arising from the use of this software.
|
||||
*
|
||||
* Permission is granted to anyone to use this software for any purpose,
|
||||
* including commercial applications, and to alter it and redistribute it
|
||||
* freely, subject to the following restrictions:
|
||||
*
|
||||
* 1. The origin of this software must not be misrepresented; you must
|
||||
* not claim that you wrote the original software. If you use this
|
||||
* software in a product, an acknowledgment in the product
|
||||
* documentation would be appreciated but is not required.
|
||||
*
|
||||
* 2. Altered source versions must be plainly marked as such, and must
|
||||
* not be misrepresented as being the original software.
|
||||
*
|
||||
* 3. This notice may not be removed or altered from any source
|
||||
* distribution.
|
||||
*/
|
||||
|
||||
#include "tinf.h"
|
||||
|
||||
typedef enum {
|
||||
FTEXT = 1,
|
||||
FHCRC = 2,
|
||||
FEXTRA = 4,
|
||||
FNAME = 8,
|
||||
FCOMMENT = 16
|
||||
} tinf_gzip_flag;
|
||||
|
||||
static unsigned int read_le16(const unsigned char *p)
|
||||
{
|
||||
return ((unsigned int) p[0])
|
||||
| ((unsigned int) p[1] << 8);
|
||||
}
|
||||
|
||||
static unsigned int read_le32(const unsigned char *p)
|
||||
{
|
||||
return ((unsigned int) p[0])
|
||||
| ((unsigned int) p[1] << 8)
|
||||
| ((unsigned int) p[2] << 16)
|
||||
| ((unsigned int) p[3] << 24);
|
||||
}
|
||||
|
||||
int tinf_gzip_uncompress(void *dest, unsigned int *destLen,
|
||||
const void *source, unsigned int sourceLen)
|
||||
{
|
||||
const unsigned char *src = (const unsigned char *) source;
|
||||
unsigned char *dst = (unsigned char *) dest;
|
||||
const unsigned char *start;
|
||||
unsigned int dlen, crc32;
|
||||
int res;
|
||||
unsigned char flg;
|
||||
|
||||
/* -- Check header -- */
|
||||
|
||||
/* Check room for at least 10 byte header and 8 byte trailer */
|
||||
if (sourceLen < 18) {
|
||||
return TINF_DATA_ERROR;
|
||||
}
|
||||
|
||||
/* Check id bytes */
|
||||
if (src[0] != 0x1F || src[1] != 0x8B) {
|
||||
return TINF_DATA_ERROR;
|
||||
}
|
||||
|
||||
/* Check method is deflate */
|
||||
if (src[2] != 8) {
|
||||
return TINF_DATA_ERROR;
|
||||
}
|
||||
|
||||
/* Get flag byte */
|
||||
flg = src[3];
|
||||
|
||||
/* Check that reserved bits are zero */
|
||||
if (flg & 0xE0) {
|
||||
return TINF_DATA_ERROR;
|
||||
}
|
||||
|
||||
/* -- Find start of compressed data -- */
|
||||
|
||||
/* Skip base header of 10 bytes */
|
||||
start = src + 10;
|
||||
|
||||
/* Skip extra data if present */
|
||||
if (flg & FEXTRA) {
|
||||
unsigned int xlen = read_le16(start);
|
||||
|
||||
if (xlen > sourceLen - 12) {
|
||||
return TINF_DATA_ERROR;
|
||||
}
|
||||
|
||||
start += xlen + 2;
|
||||
}
|
||||
|
||||
/* Skip file name if present */
|
||||
if (flg & FNAME) {
|
||||
do {
|
||||
if (start - src >= sourceLen) {
|
||||
return TINF_DATA_ERROR;
|
||||
}
|
||||
} while (*start++);
|
||||
}
|
||||
|
||||
/* Skip file comment if present */
|
||||
if (flg & FCOMMENT) {
|
||||
do {
|
||||
if (start - src >= sourceLen) {
|
||||
return TINF_DATA_ERROR;
|
||||
}
|
||||
} while (*start++);
|
||||
}
|
||||
|
||||
/* Check header crc if present */
|
||||
if (flg & FHCRC) {
|
||||
unsigned int hcrc;
|
||||
|
||||
if (start - src > sourceLen - 2) {
|
||||
return TINF_DATA_ERROR;
|
||||
}
|
||||
|
||||
hcrc = read_le16(start);
|
||||
|
||||
if (hcrc != (tinf_crc32(src, start - src) & 0x0000FFFF)) {
|
||||
return TINF_DATA_ERROR;
|
||||
}
|
||||
|
||||
start += 2;
|
||||
}
|
||||
|
||||
/* -- Get decompressed length -- */
|
||||
|
||||
dlen = read_le32(&src[sourceLen - 4]);
|
||||
|
||||
if (dlen > *destLen) {
|
||||
return TINF_BUF_ERROR;
|
||||
}
|
||||
|
||||
/* -- Get CRC32 checksum of original data -- */
|
||||
|
||||
crc32 = read_le32(&src[sourceLen - 8]);
|
||||
|
||||
/* -- Decompress data -- */
|
||||
|
||||
if ((src + sourceLen) - start < 8) {
|
||||
return TINF_DATA_ERROR;
|
||||
}
|
||||
|
||||
res = tinf_uncompress(dst, destLen, start,
|
||||
(src + sourceLen) - start - 8);
|
||||
|
||||
if (res != TINF_OK) {
|
||||
return TINF_DATA_ERROR;
|
||||
}
|
||||
|
||||
if (*destLen != dlen) {
|
||||
return TINF_DATA_ERROR;
|
||||
}
|
||||
|
||||
/* -- Check CRC32 checksum -- */
|
||||
|
||||
if (crc32 != tinf_crc32(dst, dlen)) {
|
||||
return TINF_DATA_ERROR;
|
||||
}
|
||||
|
||||
return TINF_OK;
|
||||
}
|
636
src/tinf/tinflate.c
Normal file
636
src/tinf/tinflate.c
Normal file
|
@ -0,0 +1,636 @@
|
|||
/*
|
||||
* tinflate - tiny inflate
|
||||
*
|
||||
* Copyright (c) 2003-2019 Joergen Ibsen
|
||||
*
|
||||
* This software is provided 'as-is', without any express or implied
|
||||
* warranty. In no event will the authors be held liable for any damages
|
||||
* arising from the use of this software.
|
||||
*
|
||||
* Permission is granted to anyone to use this software for any purpose,
|
||||
* including commercial applications, and to alter it and redistribute it
|
||||
* freely, subject to the following restrictions:
|
||||
*
|
||||
* 1. The origin of this software must not be misrepresented; you must
|
||||
* not claim that you wrote the original software. If you use this
|
||||
* software in a product, an acknowledgment in the product
|
||||
* documentation would be appreciated but is not required.
|
||||
*
|
||||
* 2. Altered source versions must be plainly marked as such, and must
|
||||
* not be misrepresented as being the original software.
|
||||
*
|
||||
* 3. This notice may not be removed or altered from any source
|
||||
* distribution.
|
||||
*/
|
||||
|
||||
#include "tinf.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <limits.h>
|
||||
|
||||
#if defined(UINT_MAX) && (UINT_MAX) < 0xFFFFFFFFUL
|
||||
# error "tinf requires unsigned int to be at least 32-bit"
|
||||
#endif
|
||||
|
||||
/* -- Internal data structures -- */
|
||||
|
||||
struct tinf_tree {
|
||||
unsigned short counts[16]; /* Number of codes with a given length */
|
||||
unsigned short symbols[288]; /* Symbols sorted by code */
|
||||
int max_sym;
|
||||
};
|
||||
|
||||
struct tinf_data {
|
||||
const unsigned char *source;
|
||||
const unsigned char *source_end;
|
||||
unsigned int tag;
|
||||
int bitcount;
|
||||
int overflow;
|
||||
|
||||
unsigned char *dest_start;
|
||||
unsigned char *dest;
|
||||
unsigned char *dest_end;
|
||||
|
||||
struct tinf_tree ltree; /* Literal/length tree */
|
||||
struct tinf_tree dtree; /* Distance tree */
|
||||
};
|
||||
|
||||
/* -- Utility functions -- */
|
||||
|
||||
static unsigned int read_le16(const unsigned char *p)
|
||||
{
|
||||
return ((unsigned int) p[0])
|
||||
| ((unsigned int) p[1] << 8);
|
||||
}
|
||||
|
||||
/* Build fixed Huffman trees */
|
||||
static void tinf_build_fixed_trees(struct tinf_tree *lt, struct tinf_tree *dt)
|
||||
{
|
||||
int i;
|
||||
|
||||
/* Build fixed literal/length tree */
|
||||
for (i = 0; i < 16; ++i) {
|
||||
lt->counts[i] = 0;
|
||||
}
|
||||
|
||||
lt->counts[7] = 24;
|
||||
lt->counts[8] = 152;
|
||||
lt->counts[9] = 112;
|
||||
|
||||
for (i = 0; i < 24; ++i) {
|
||||
lt->symbols[i] = 256 + i;
|
||||
}
|
||||
for (i = 0; i < 144; ++i) {
|
||||
lt->symbols[24 + i] = i;
|
||||
}
|
||||
for (i = 0; i < 8; ++i) {
|
||||
lt->symbols[24 + 144 + i] = 280 + i;
|
||||
}
|
||||
for (i = 0; i < 112; ++i) {
|
||||
lt->symbols[24 + 144 + 8 + i] = 144 + i;
|
||||
}
|
||||
|
||||
lt->max_sym = 285;
|
||||
|
||||
/* Build fixed distance tree */
|
||||
for (i = 0; i < 16; ++i) {
|
||||
dt->counts[i] = 0;
|
||||
}
|
||||
|
||||
dt->counts[5] = 32;
|
||||
|
||||
for (i = 0; i < 32; ++i) {
|
||||
dt->symbols[i] = i;
|
||||
}
|
||||
|
||||
dt->max_sym = 29;
|
||||
}
|
||||
|
||||
/* Given an array of code lengths, build a tree */
|
||||
static int tinf_build_tree(struct tinf_tree *t, const unsigned char *lengths,
|
||||
unsigned int num)
|
||||
{
|
||||
unsigned short offs[16];
|
||||
unsigned int i, num_codes, available;
|
||||
|
||||
assert(num <= 288);
|
||||
|
||||
for (i = 0; i < 16; ++i) {
|
||||
t->counts[i] = 0;
|
||||
}
|
||||
|
||||
t->max_sym = -1;
|
||||
|
||||
/* Count number of codes for each non-zero length */
|
||||
for (i = 0; i < num; ++i) {
|
||||
assert(lengths[i] <= 15);
|
||||
|
||||
if (lengths[i]) {
|
||||
t->max_sym = i;
|
||||
t->counts[lengths[i]]++;
|
||||
}
|
||||
}
|
||||
|
||||
/* Compute offset table for distribution sort */
|
||||
for (available = 1, num_codes = 0, i = 0; i < 16; ++i) {
|
||||
unsigned int used = t->counts[i];
|
||||
|
||||
/* Check length contains no more codes than available */
|
||||
if (used > available) {
|
||||
return TINF_DATA_ERROR;
|
||||
}
|
||||
available = 2 * (available - used);
|
||||
|
||||
offs[i] = num_codes;
|
||||
num_codes += used;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check all codes were used, or for the special case of only one
|
||||
* code that it has length 1
|
||||
*/
|
||||
if ((num_codes > 1 && available > 0)
|
||||
|| (num_codes == 1 && t->counts[1] != 1)) {
|
||||
return TINF_DATA_ERROR;
|
||||
}
|
||||
|
||||
/* Fill in symbols sorted by code */
|
||||
for (i = 0; i < num; ++i) {
|
||||
if (lengths[i]) {
|
||||
t->symbols[offs[lengths[i]]++] = i;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* For the special case of only one code (which will be 0) add a
|
||||
* code 1 which results in a symbol that is too large
|
||||
*/
|
||||
if (num_codes == 1) {
|
||||
t->counts[1] = 2;
|
||||
t->symbols[1] = t->max_sym + 1;
|
||||
}
|
||||
|
||||
return TINF_OK;
|
||||
}
|
||||
|
||||
/* -- Decode functions -- */
|
||||
|
||||
static void tinf_refill(struct tinf_data *d, int num)
|
||||
{
|
||||
assert(num >= 0 && num <= 32);
|
||||
|
||||
/* Read bytes until at least num bits available */
|
||||
while (d->bitcount < num) {
|
||||
if (d->source != d->source_end) {
|
||||
d->tag |= (unsigned int) *d->source++ << d->bitcount;
|
||||
}
|
||||
else {
|
||||
d->overflow = 1;
|
||||
}
|
||||
d->bitcount += 8;
|
||||
}
|
||||
|
||||
assert(d->bitcount <= 32);
|
||||
}
|
||||
|
||||
static unsigned int tinf_getbits_no_refill(struct tinf_data *d, int num)
|
||||
{
|
||||
unsigned int bits;
|
||||
|
||||
assert(num >= 0 && num <= d->bitcount);
|
||||
|
||||
/* Get bits from tag */
|
||||
bits = d->tag & ((1UL << num) - 1);
|
||||
|
||||
/* Remove bits from tag */
|
||||
d->tag >>= num;
|
||||
d->bitcount -= num;
|
||||
|
||||
return bits;
|
||||
}
|
||||
|
||||
/* Get num bits from source stream */
|
||||
static unsigned int tinf_getbits(struct tinf_data *d, int num)
|
||||
{
|
||||
tinf_refill(d, num);
|
||||
return tinf_getbits_no_refill(d, num);
|
||||
}
|
||||
|
||||
/* Read a num bit value from stream and add base */
|
||||
static unsigned int tinf_getbits_base(struct tinf_data *d, int num, int base)
|
||||
{
|
||||
return base + (num ? tinf_getbits(d, num) : 0);
|
||||
}
|
||||
|
||||
/* Given a data stream and a tree, decode a symbol */
|
||||
static int tinf_decode_symbol(struct tinf_data *d, const struct tinf_tree *t)
|
||||
{
|
||||
int base = 0, offs = 0;
|
||||
int len;
|
||||
|
||||
/*
|
||||
* Get more bits while code index is above number of codes
|
||||
*
|
||||
* Rather than the actual code, we are computing the position of the
|
||||
* code in the sorted order of codes, which is the index of the
|
||||
* corresponding symbol.
|
||||
*
|
||||
* Conceptually, for each code length (level in the tree), there are
|
||||
* counts[len] leaves on the left and internal nodes on the right.
|
||||
* The index we have decoded so far is base + offs, and if that
|
||||
* falls within the leaves we are done. Otherwise we adjust the range
|
||||
* of offs and add one more bit to it.
|
||||
*/
|
||||
for (len = 1; ; ++len) {
|
||||
offs = 2 * offs + tinf_getbits(d, 1);
|
||||
|
||||
assert(len <= 15);
|
||||
|
||||
if (offs < t->counts[len]) {
|
||||
break;
|
||||
}
|
||||
|
||||
base += t->counts[len];
|
||||
offs -= t->counts[len];
|
||||
}
|
||||
|
||||
assert(base + offs >= 0 && base + offs < 288);
|
||||
|
||||
return t->symbols[base + offs];
|
||||
}
|
||||
|
||||
/* Given a data stream, decode dynamic trees from it */
|
||||
static int tinf_decode_trees(struct tinf_data *d, struct tinf_tree *lt,
|
||||
struct tinf_tree *dt)
|
||||
{
|
||||
unsigned char lengths[288 + 32];
|
||||
|
||||
/* Special ordering of code length codes */
|
||||
static const unsigned char clcidx[19] = {
|
||||
16, 17, 18, 0, 8, 7, 9, 6, 10, 5,
|
||||
11, 4, 12, 3, 13, 2, 14, 1, 15
|
||||
};
|
||||
|
||||
unsigned int hlit, hdist, hclen;
|
||||
unsigned int i, num, length;
|
||||
int res;
|
||||
|
||||
/* Get 5 bits HLIT (257-286) */
|
||||
hlit = tinf_getbits_base(d, 5, 257);
|
||||
|
||||
/* Get 5 bits HDIST (1-32) */
|
||||
hdist = tinf_getbits_base(d, 5, 1);
|
||||
|
||||
/* Get 4 bits HCLEN (4-19) */
|
||||
hclen = tinf_getbits_base(d, 4, 4);
|
||||
|
||||
/*
|
||||
* The RFC limits the range of HLIT to 286, but lists HDIST as range
|
||||
* 1-32, even though distance codes 30 and 31 have no meaning. While
|
||||
* we could allow the full range of HLIT and HDIST to make it possible
|
||||
* to decode the fixed trees with this function, we consider it an
|
||||
* error here.
|
||||
*
|
||||
* See also: https://github.com/madler/zlib/issues/82
|
||||
*/
|
||||
if (hlit > 286 || hdist > 30) {
|
||||
return TINF_DATA_ERROR;
|
||||
}
|
||||
|
||||
for (i = 0; i < 19; ++i) {
|
||||
lengths[i] = 0;
|
||||
}
|
||||
|
||||
/* Read code lengths for code length alphabet */
|
||||
for (i = 0; i < hclen; ++i) {
|
||||
/* Get 3 bits code length (0-7) */
|
||||
unsigned int clen = tinf_getbits(d, 3);
|
||||
|
||||
lengths[clcidx[i]] = clen;
|
||||
}
|
||||
|
||||
/* Build code length tree (in literal/length tree to save space) */
|
||||
res = tinf_build_tree(lt, lengths, 19);
|
||||
|
||||
if (res != TINF_OK) {
|
||||
return res;
|
||||
}
|
||||
|
||||
/* Check code length tree is not empty */
|
||||
if (lt->max_sym == -1) {
|
||||
return TINF_DATA_ERROR;
|
||||
}
|
||||
|
||||
/* Decode code lengths for the dynamic trees */
|
||||
for (num = 0; num < hlit + hdist; ) {
|
||||
int sym = tinf_decode_symbol(d, lt);
|
||||
|
||||
if (sym > lt->max_sym) {
|
||||
return TINF_DATA_ERROR;
|
||||
}
|
||||
|
||||
switch (sym) {
|
||||
case 16:
|
||||
/* Copy previous code length 3-6 times (read 2 bits) */
|
||||
if (num == 0) {
|
||||
return TINF_DATA_ERROR;
|
||||
}
|
||||
sym = lengths[num - 1];
|
||||
length = tinf_getbits_base(d, 2, 3);
|
||||
break;
|
||||
case 17:
|
||||
/* Repeat code length 0 for 3-10 times (read 3 bits) */
|
||||
sym = 0;
|
||||
length = tinf_getbits_base(d, 3, 3);
|
||||
break;
|
||||
case 18:
|
||||
/* Repeat code length 0 for 11-138 times (read 7 bits) */
|
||||
sym = 0;
|
||||
length = tinf_getbits_base(d, 7, 11);
|
||||
break;
|
||||
default:
|
||||
/* Values 0-15 represent the actual code lengths */
|
||||
length = 1;
|
||||
break;
|
||||
}
|
||||
|
||||
if (length > hlit + hdist - num) {
|
||||
return TINF_DATA_ERROR;
|
||||
}
|
||||
|
||||
while (length--) {
|
||||
lengths[num++] = sym;
|
||||
}
|
||||
}
|
||||
|
||||
/* Check EOB symbol is present */
|
||||
if (lengths[256] == 0) {
|
||||
return TINF_DATA_ERROR;
|
||||
}
|
||||
|
||||
/* Build dynamic trees */
|
||||
res = tinf_build_tree(lt, lengths, hlit);
|
||||
|
||||
if (res != TINF_OK) {
|
||||
return res;
|
||||
}
|
||||
|
||||
res = tinf_build_tree(dt, lengths + hlit, hdist);
|
||||
|
||||
if (res != TINF_OK) {
|
||||
return res;
|
||||
}
|
||||
|
||||
return TINF_OK;
|
||||
}
|
||||
|
||||
/* -- Block inflate functions -- */
|
||||
|
||||
/* Given a stream and two trees, inflate a block of data */
|
||||
static int tinf_inflate_block_data(struct tinf_data *d, struct tinf_tree *lt,
|
||||
struct tinf_tree *dt)
|
||||
{
|
||||
/* Extra bits and base tables for length codes */
|
||||
static const unsigned char length_bits[30] = {
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 1, 1,
|
||||
1, 1, 2, 2, 2, 2, 3, 3, 3, 3,
|
||||
4, 4, 4, 4, 5, 5, 5, 5, 0, 127
|
||||
};
|
||||
|
||||
static const unsigned short length_base[30] = {
|
||||
3, 4, 5, 6, 7, 8, 9, 10, 11, 13,
|
||||
15, 17, 19, 23, 27, 31, 35, 43, 51, 59,
|
||||
67, 83, 99, 115, 131, 163, 195, 227, 258, 0
|
||||
};
|
||||
|
||||
/* Extra bits and base tables for distance codes */
|
||||
static const unsigned char dist_bits[30] = {
|
||||
0, 0, 0, 0, 1, 1, 2, 2, 3, 3,
|
||||
4, 4, 5, 5, 6, 6, 7, 7, 8, 8,
|
||||
9, 9, 10, 10, 11, 11, 12, 12, 13, 13
|
||||
};
|
||||
|
||||
static const unsigned short dist_base[30] = {
|
||||
1, 2, 3, 4, 5, 7, 9, 13, 17, 25,
|
||||
33, 49, 65, 97, 129, 193, 257, 385, 513, 769,
|
||||
1025, 1537, 2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577
|
||||
};
|
||||
|
||||
for (;;) {
|
||||
int sym = tinf_decode_symbol(d, lt);
|
||||
|
||||
/* Check for overflow in bit reader */
|
||||
if (d->overflow) {
|
||||
return TINF_DATA_ERROR;
|
||||
}
|
||||
|
||||
if (sym < 256) {
|
||||
if (d->dest == d->dest_end) {
|
||||
return TINF_BUF_ERROR;
|
||||
}
|
||||
*d->dest++ = sym;
|
||||
}
|
||||
else {
|
||||
int length, dist, offs;
|
||||
int i;
|
||||
|
||||
/* Check for end of block */
|
||||
if (sym == 256) {
|
||||
return TINF_OK;
|
||||
}
|
||||
|
||||
/* Check sym is within range and distance tree is not empty */
|
||||
if (sym > lt->max_sym || sym - 257 > 28 || dt->max_sym == -1) {
|
||||
return TINF_DATA_ERROR;
|
||||
}
|
||||
|
||||
sym -= 257;
|
||||
|
||||
/* Possibly get more bits from length code */
|
||||
length = tinf_getbits_base(d, length_bits[sym],
|
||||
length_base[sym]);
|
||||
|
||||
dist = tinf_decode_symbol(d, dt);
|
||||
|
||||
/* Check dist is within range */
|
||||
if (dist > dt->max_sym || dist > 29) {
|
||||
return TINF_DATA_ERROR;
|
||||
}
|
||||
|
||||
/* Possibly get more bits from distance code */
|
||||
offs = tinf_getbits_base(d, dist_bits[dist],
|
||||
dist_base[dist]);
|
||||
|
||||
if (offs > d->dest - d->dest_start) {
|
||||
return TINF_DATA_ERROR;
|
||||
}
|
||||
|
||||
if (d->dest_end - d->dest < length) {
|
||||
return TINF_BUF_ERROR;
|
||||
}
|
||||
|
||||
/* Copy match */
|
||||
for (i = 0; i < length; ++i) {
|
||||
d->dest[i] = d->dest[i - offs];
|
||||
}
|
||||
|
||||
d->dest += length;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Inflate an uncompressed block of data */
|
||||
static int tinf_inflate_uncompressed_block(struct tinf_data *d)
|
||||
{
|
||||
unsigned int length, invlength;
|
||||
|
||||
if (d->source_end - d->source < 4) {
|
||||
return TINF_DATA_ERROR;
|
||||
}
|
||||
|
||||
/* Get length */
|
||||
length = read_le16(d->source);
|
||||
|
||||
/* Get one's complement of length */
|
||||
invlength = read_le16(d->source + 2);
|
||||
|
||||
/* Check length */
|
||||
if (length != (~invlength & 0x0000FFFF)) {
|
||||
return TINF_DATA_ERROR;
|
||||
}
|
||||
|
||||
d->source += 4;
|
||||
|
||||
if (d->source_end - d->source < length) {
|
||||
return TINF_DATA_ERROR;
|
||||
}
|
||||
|
||||
if (d->dest_end - d->dest < length) {
|
||||
return TINF_BUF_ERROR;
|
||||
}
|
||||
|
||||
/* Copy block */
|
||||
while (length--) {
|
||||
*d->dest++ = *d->source++;
|
||||
}
|
||||
|
||||
/* Make sure we start next block on a byte boundary */
|
||||
d->tag = 0;
|
||||
d->bitcount = 0;
|
||||
|
||||
return TINF_OK;
|
||||
}
|
||||
|
||||
/* Inflate a block of data compressed with fixed Huffman trees */
|
||||
static int tinf_inflate_fixed_block(struct tinf_data *d)
|
||||
{
|
||||
/* Build fixed Huffman trees */
|
||||
tinf_build_fixed_trees(&d->ltree, &d->dtree);
|
||||
|
||||
/* Decode block using fixed trees */
|
||||
return tinf_inflate_block_data(d, &d->ltree, &d->dtree);
|
||||
}
|
||||
|
||||
/* Inflate a block of data compressed with dynamic Huffman trees */
|
||||
static int tinf_inflate_dynamic_block(struct tinf_data *d)
|
||||
{
|
||||
/* Decode trees from stream */
|
||||
int res = tinf_decode_trees(d, &d->ltree, &d->dtree);
|
||||
|
||||
if (res != TINF_OK) {
|
||||
return res;
|
||||
}
|
||||
|
||||
/* Decode block using decoded trees */
|
||||
return tinf_inflate_block_data(d, &d->ltree, &d->dtree);
|
||||
}
|
||||
|
||||
/* -- Public functions -- */
|
||||
|
||||
/* Initialize global (static) data */
|
||||
void tinf_init(void)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
/* Inflate stream from source to dest */
|
||||
int tinf_uncompress(void *dest, unsigned int *destLen,
|
||||
const void *source, unsigned int sourceLen)
|
||||
{
|
||||
struct tinf_data d;
|
||||
int bfinal;
|
||||
|
||||
/* Initialise data */
|
||||
d.source = (const unsigned char *) source;
|
||||
d.source_end = d.source + sourceLen;
|
||||
d.tag = 0;
|
||||
d.bitcount = 0;
|
||||
d.overflow = 0;
|
||||
|
||||
d.dest = (unsigned char *) dest;
|
||||
d.dest_start = d.dest;
|
||||
d.dest_end = d.dest + *destLen;
|
||||
|
||||
do {
|
||||
unsigned int btype;
|
||||
int res;
|
||||
|
||||
/* Read final block flag */
|
||||
bfinal = tinf_getbits(&d, 1);
|
||||
|
||||
/* Read block type (2 bits) */
|
||||
btype = tinf_getbits(&d, 2);
|
||||
|
||||
/* Decompress block */
|
||||
switch (btype) {
|
||||
case 0:
|
||||
/* Decompress uncompressed block */
|
||||
res = tinf_inflate_uncompressed_block(&d);
|
||||
break;
|
||||
case 1:
|
||||
/* Decompress block with fixed Huffman trees */
|
||||
res = tinf_inflate_fixed_block(&d);
|
||||
break;
|
||||
case 2:
|
||||
/* Decompress block with dynamic Huffman trees */
|
||||
res = tinf_inflate_dynamic_block(&d);
|
||||
break;
|
||||
default:
|
||||
res = TINF_DATA_ERROR;
|
||||
break;
|
||||
}
|
||||
|
||||
if (res != TINF_OK) {
|
||||
return res;
|
||||
}
|
||||
} while (!bfinal);
|
||||
|
||||
/* Check for overflow in bit reader */
|
||||
if (d.overflow) {
|
||||
return TINF_DATA_ERROR;
|
||||
}
|
||||
|
||||
*destLen = d.dest - d.dest_start;
|
||||
|
||||
return TINF_OK;
|
||||
}
|
||||
|
||||
/* clang -g -O1 -fsanitize=fuzzer,address -DTINF_FUZZING tinflate.c */
|
||||
#if defined(TINF_FUZZING)
|
||||
#include <limits.h>
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
unsigned char depacked[64 * 1024];
|
||||
|
||||
extern int
|
||||
LLVMFuzzerTestOneInput(const uint8_t *data, size_t size)
|
||||
{
|
||||
if (size > UINT_MAX / 2) { return 0; }
|
||||
unsigned int destLen = sizeof(depacked);
|
||||
tinf_uncompress(depacked, &destLen, data, size);
|
||||
return 0;
|
||||
}
|
||||
#endif
|
95
src/tinf/tinfzlib.c
Normal file
95
src/tinf/tinfzlib.c
Normal file
|
@ -0,0 +1,95 @@
|
|||
/*
|
||||
* tinfzlib - tiny zlib decompressor
|
||||
*
|
||||
* Copyright (c) 2003-2019 Joergen Ibsen
|
||||
*
|
||||
* This software is provided 'as-is', without any express or implied
|
||||
* warranty. In no event will the authors be held liable for any damages
|
||||
* arising from the use of this software.
|
||||
*
|
||||
* Permission is granted to anyone to use this software for any purpose,
|
||||
* including commercial applications, and to alter it and redistribute it
|
||||
* freely, subject to the following restrictions:
|
||||
*
|
||||
* 1. The origin of this software must not be misrepresented; you must
|
||||
* not claim that you wrote the original software. If you use this
|
||||
* software in a product, an acknowledgment in the product
|
||||
* documentation would be appreciated but is not required.
|
||||
*
|
||||
* 2. Altered source versions must be plainly marked as such, and must
|
||||
* not be misrepresented as being the original software.
|
||||
*
|
||||
* 3. This notice may not be removed or altered from any source
|
||||
* distribution.
|
||||
*/
|
||||
|
||||
#include "tinf.h"
|
||||
|
||||
static unsigned int read_be32(const unsigned char *p)
|
||||
{
|
||||
return ((unsigned int) p[0] << 24)
|
||||
| ((unsigned int) p[1] << 16)
|
||||
| ((unsigned int) p[2] << 8)
|
||||
| ((unsigned int) p[3]);
|
||||
}
|
||||
|
||||
int tinf_zlib_uncompress(void *dest, unsigned int *destLen,
|
||||
const void *source, unsigned int sourceLen)
|
||||
{
|
||||
const unsigned char *src = (const unsigned char *) source;
|
||||
unsigned char *dst = (unsigned char *) dest;
|
||||
unsigned int a32;
|
||||
int res;
|
||||
unsigned char cmf, flg;
|
||||
|
||||
/* -- Check header -- */
|
||||
|
||||
/* Check room for at least 2 byte header and 4 byte trailer */
|
||||
if (sourceLen < 6) {
|
||||
return TINF_DATA_ERROR;
|
||||
}
|
||||
|
||||
/* Get header bytes */
|
||||
cmf = src[0];
|
||||
flg = src[1];
|
||||
|
||||
/* Check checksum */
|
||||
if ((256 * cmf + flg) % 31) {
|
||||
return TINF_DATA_ERROR;
|
||||
}
|
||||
|
||||
/* Check method is deflate */
|
||||
if ((cmf & 0x0F) != 8) {
|
||||
return TINF_DATA_ERROR;
|
||||
}
|
||||
|
||||
/* Check window size is valid */
|
||||
if ((cmf >> 4) > 7) {
|
||||
return TINF_DATA_ERROR;
|
||||
}
|
||||
|
||||
/* Check there is no preset dictionary */
|
||||
if (flg & 0x20) {
|
||||
return TINF_DATA_ERROR;
|
||||
}
|
||||
|
||||
/* -- Get Adler-32 checksum of original data -- */
|
||||
|
||||
a32 = read_be32(&src[sourceLen - 4]);
|
||||
|
||||
/* -- Decompress data -- */
|
||||
|
||||
res = tinf_uncompress(dst, destLen, src + 2, sourceLen - 6);
|
||||
|
||||
if (res != TINF_OK) {
|
||||
return TINF_DATA_ERROR;
|
||||
}
|
||||
|
||||
/* -- Check Adler-32 checksum -- */
|
||||
|
||||
if (a32 != tinf_adler32(dst, *destLen)) {
|
||||
return TINF_DATA_ERROR;
|
||||
}
|
||||
|
||||
return TINF_OK;
|
||||
}
|
Loading…
Reference in a new issue