githaven-fork/vendor/github.com/xi2/xz/dec_bcj.go

462 lines
12 KiB
Go
Raw Normal View History

/*
* Branch/Call/Jump (BCJ) filter decoders
*
* Authors: Lasse Collin <lasse.collin@tukaani.org>
* Igor Pavlov <http://7-zip.org/>
*
* Translation to Go: Michael Cross <https://github.com/xi2>
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*/
package xz
/* from linux/lib/xz/xz_dec_bcj.c *************************************/
type xzDecBCJ struct {
/* Type of the BCJ filter being used */
typ xzFilterID
/*
* Return value of the next filter in the chain. We need to preserve
* this information across calls, because we must not call the next
* filter anymore once it has returned xzStreamEnd
*/
ret xzRet
/*
* Absolute position relative to the beginning of the uncompressed
* data (in a single .xz Block).
*/
pos int
/* x86 filter state */
x86PrevMask uint32
/* Temporary space to hold the variables from xzBuf */
out []byte
outPos int
temp struct {
/* Amount of already filtered data in the beginning of buf */
filtered int
/*
* Buffer to hold a mix of filtered and unfiltered data. This
* needs to be big enough to hold Alignment + 2 * Look-ahead:
*
* Type Alignment Look-ahead
* x86 1 4
* PowerPC 4 0
* IA-64 16 0
* ARM 4 0
* ARM-Thumb 2 2
* SPARC 4 0
*/
buf []byte // slice buf will be backed by bufArray
bufArray [16]byte
}
}
/*
* This is used to test the most significant byte of a memory address
* in an x86 instruction.
*/
func bcjX86TestMSByte(b byte) bool {
return b == 0x00 || b == 0xff
}
func bcjX86Filter(s *xzDecBCJ, buf []byte) int {
var maskToAllowedStatus = []bool{
true, true, true, false, true, false, false, false,
}
var maskToBitNum = []byte{0, 1, 2, 2, 3, 3, 3, 3}
var i int
var prevPos int = -1
var prevMask uint32 = s.x86PrevMask
var src uint32
var dest uint32
var j uint32
var b byte
if len(buf) <= 4 {
return 0
}
for i = 0; i < len(buf)-4; i++ {
if buf[i]&0xfe != 0xe8 {
continue
}
prevPos = i - prevPos
if prevPos > 3 {
prevMask = 0
} else {
prevMask = (prevMask << (uint(prevPos) - 1)) & 7
if prevMask != 0 {
b = buf[i+4-int(maskToBitNum[prevMask])]
if !maskToAllowedStatus[prevMask] || bcjX86TestMSByte(b) {
prevPos = i
prevMask = prevMask<<1 | 1
continue
}
}
}
prevPos = i
if bcjX86TestMSByte(buf[i+4]) {
src = getLE32(buf[i+1:])
for {
dest = src - uint32(s.pos+i+5)
if prevMask == 0 {
break
}
j = uint32(maskToBitNum[prevMask]) * 8
b = byte(dest >> (24 - j))
if !bcjX86TestMSByte(b) {
break
}
src = dest ^ (1<<(32-j) - 1)
}
dest &= 0x01FFFFFF
dest |= 0 - dest&0x01000000
putLE32(dest, buf[i+1:])
i += 4
} else {
prevMask = prevMask<<1 | 1
}
}
prevPos = i - prevPos
if prevPos > 3 {
s.x86PrevMask = 0
} else {
s.x86PrevMask = prevMask << (uint(prevPos) - 1)
}
return i
}
func bcjPowerPCFilter(s *xzDecBCJ, buf []byte) int {
var i int
var instr uint32
for i = 0; i+4 <= len(buf); i += 4 {
instr = getBE32(buf[i:])
if instr&0xFC000003 == 0x48000001 {
instr &= 0x03FFFFFC
instr -= uint32(s.pos + i)
instr &= 0x03FFFFFC
instr |= 0x48000001
putBE32(instr, buf[i:])
}
}
return i
}
var bcjIA64BranchTable = [...]byte{
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
4, 4, 6, 6, 0, 0, 7, 7,
4, 4, 0, 0, 4, 4, 0, 0,
}
func bcjIA64Filter(s *xzDecBCJ, buf []byte) int {
var branchTable = bcjIA64BranchTable[:]
/*
* The local variables take a little bit stack space, but it's less
* than what LZMA2 decoder takes, so it doesn't make sense to reduce
* stack usage here without doing that for the LZMA2 decoder too.
*/
/* Loop counters */
var i int
var j int
/* Instruction slot (0, 1, or 2) in the 128-bit instruction word */
var slot uint32
/* Bitwise offset of the instruction indicated by slot */
var bitPos uint32
/* bit_pos split into byte and bit parts */
var bytePos uint32
var bitRes uint32
/* Address part of an instruction */
var addr uint32
/* Mask used to detect which instructions to convert */
var mask uint32
/* 41-bit instruction stored somewhere in the lowest 48 bits */
var instr uint64
/* Instruction normalized with bit_res for easier manipulation */
var norm uint64
for i = 0; i+16 <= len(buf); i += 16 {
mask = uint32(branchTable[buf[i]&0x1f])
for slot, bitPos = 0, 5; slot < 3; slot, bitPos = slot+1, bitPos+41 {
if (mask>>slot)&1 == 0 {
continue
}
bytePos = bitPos >> 3
bitRes = bitPos & 7
instr = 0
for j = 0; j < 6; j++ {
instr |= uint64(buf[i+j+int(bytePos)]) << (8 * uint(j))
}
norm = instr >> bitRes
if (norm>>37)&0x0f == 0x05 && (norm>>9)&0x07 == 0 {
addr = uint32((norm >> 13) & 0x0fffff)
addr |= (uint32(norm>>36) & 1) << 20
addr <<= 4
addr -= uint32(s.pos + i)
addr >>= 4
norm &= ^(uint64(0x8fffff) << 13)
norm |= uint64(addr&0x0fffff) << 13
norm |= uint64(addr&0x100000) << (36 - 20)
instr &= 1<<bitRes - 1
instr |= norm << bitRes
for j = 0; j < 6; j++ {
buf[i+j+int(bytePos)] = byte(instr >> (8 * uint(j)))
}
}
}
}
return i
}
func bcjARMFilter(s *xzDecBCJ, buf []byte) int {
var i int
var addr uint32
for i = 0; i+4 <= len(buf); i += 4 {
if buf[i+3] == 0xeb {
addr = uint32(buf[i]) | uint32(buf[i+1])<<8 |
uint32(buf[i+2])<<16
addr <<= 2
addr -= uint32(s.pos + i + 8)
addr >>= 2
buf[i] = byte(addr)
buf[i+1] = byte(addr >> 8)
buf[i+2] = byte(addr >> 16)
}
}
return i
}
func bcjARMThumbFilter(s *xzDecBCJ, buf []byte) int {
var i int
var addr uint32
for i = 0; i+4 <= len(buf); i += 2 {
if buf[i+1]&0xf8 == 0xf0 && buf[i+3]&0xf8 == 0xf8 {
addr = uint32(buf[i+1]&0x07)<<19 |
uint32(buf[i])<<11 |
uint32(buf[i+3]&0x07)<<8 |
uint32(buf[i+2])
addr <<= 1
addr -= uint32(s.pos + i + 4)
addr >>= 1
buf[i+1] = byte(0xf0 | (addr>>19)&0x07)
buf[i] = byte(addr >> 11)
buf[i+3] = byte(0xf8 | (addr>>8)&0x07)
buf[i+2] = byte(addr)
i += 2
}
}
return i
}
func bcjSPARCFilter(s *xzDecBCJ, buf []byte) int {
var i int
var instr uint32
for i = 0; i+4 <= len(buf); i += 4 {
instr = getBE32(buf[i:])
if instr>>22 == 0x100 || instr>>22 == 0x1ff {
instr <<= 2
instr -= uint32(s.pos + i)
instr >>= 2
instr = (0x40000000 - instr&0x400000) |
0x40000000 | (instr & 0x3FFFFF)
putBE32(instr, buf[i:])
}
}
return i
}
/*
* Apply the selected BCJ filter. Update *pos and s.pos to match the amount
* of data that got filtered.
*/
func bcjApply(s *xzDecBCJ, buf []byte, pos *int) {
var filtered int
buf = buf[*pos:]
switch s.typ {
case idBCJX86:
filtered = bcjX86Filter(s, buf)
case idBCJPowerPC:
filtered = bcjPowerPCFilter(s, buf)
case idBCJIA64:
filtered = bcjIA64Filter(s, buf)
case idBCJARM:
filtered = bcjARMFilter(s, buf)
case idBCJARMThumb:
filtered = bcjARMThumbFilter(s, buf)
case idBCJSPARC:
filtered = bcjSPARCFilter(s, buf)
default:
/* Never reached */
}
*pos += filtered
s.pos += filtered
}
/*
* Flush pending filtered data from temp to the output buffer.
* Move the remaining mixture of possibly filtered and unfiltered
* data to the beginning of temp.
*/
func bcjFlush(s *xzDecBCJ, b *xzBuf) {
var copySize int
copySize = len(b.out) - b.outPos
if copySize > s.temp.filtered {
copySize = s.temp.filtered
}
copy(b.out[b.outPos:], s.temp.buf[:copySize])
b.outPos += copySize
s.temp.filtered -= copySize
copy(s.temp.buf, s.temp.buf[copySize:])
s.temp.buf = s.temp.buf[:len(s.temp.buf)-copySize]
}
/*
* Decode raw stream which has a BCJ filter as the first filter.
*
* The BCJ filter functions are primitive in sense that they process the
* data in chunks of 1-16 bytes. To hide this issue, this function does
* some buffering.
*/
func xzDecBCJRun(s *xzDecBCJ, b *xzBuf, chain func(*xzBuf) xzRet) xzRet {
var outStart int
/*
* Flush pending already filtered data to the output buffer. Return
* immediately if we couldn't flush everything, or if the next
* filter in the chain had already returned xzStreamEnd.
*/
if s.temp.filtered > 0 {
bcjFlush(s, b)
if s.temp.filtered > 0 {
return xzOK
}
if s.ret == xzStreamEnd {
return xzStreamEnd
}
}
/*
* If we have more output space than what is currently pending in
* temp, copy the unfiltered data from temp to the output buffer
* and try to fill the output buffer by decoding more data from the
* next filter in the chain. Apply the BCJ filter on the new data
* in the output buffer. If everything cannot be filtered, copy it
* to temp and rewind the output buffer position accordingly.
*
* This needs to be always run when len(temp.buf) == 0 to handle a special
* case where the output buffer is full and the next filter has no
* more output coming but hasn't returned xzStreamEnd yet.
*/
if len(s.temp.buf) < len(b.out)-b.outPos || len(s.temp.buf) == 0 {
outStart = b.outPos
copy(b.out[b.outPos:], s.temp.buf)
b.outPos += len(s.temp.buf)
s.ret = chain(b)
if s.ret != xzStreamEnd && s.ret != xzOK {
return s.ret
}
bcjApply(s, b.out[:b.outPos], &outStart)
/*
* As an exception, if the next filter returned xzStreamEnd,
* we can do that too, since the last few bytes that remain
* unfiltered are meant to remain unfiltered.
*/
if s.ret == xzStreamEnd {
return xzStreamEnd
}
s.temp.buf = s.temp.bufArray[:b.outPos-outStart]
b.outPos -= len(s.temp.buf)
copy(s.temp.buf, b.out[b.outPos:])
/*
* If there wasn't enough input to the next filter to fill
* the output buffer with unfiltered data, there's no point
* to try decoding more data to temp.
*/
if b.outPos+len(s.temp.buf) < len(b.out) {
return xzOK
}
}
/*
* We have unfiltered data in temp. If the output buffer isn't full
* yet, try to fill the temp buffer by decoding more data from the
* next filter. Apply the BCJ filter on temp. Then we hopefully can
* fill the actual output buffer by copying filtered data from temp.
* A mix of filtered and unfiltered data may be left in temp; it will
* be taken care on the next call to this function.
*/
if b.outPos < len(b.out) {
/* Make b.out temporarily point to s.temp. */
s.out = b.out
s.outPos = b.outPos
b.out = s.temp.bufArray[:]
b.outPos = len(s.temp.buf)
s.ret = chain(b)
s.temp.buf = s.temp.bufArray[:b.outPos]
b.out = s.out
b.outPos = s.outPos
if s.ret != xzOK && s.ret != xzStreamEnd {
return s.ret
}
bcjApply(s, s.temp.buf, &s.temp.filtered)
/*
* If the next filter returned xzStreamEnd, we mark that
* everything is filtered, since the last unfiltered bytes
* of the stream are meant to be left as is.
*/
if s.ret == xzStreamEnd {
s.temp.filtered = len(s.temp.buf)
}
bcjFlush(s, b)
if s.temp.filtered > 0 {
return xzOK
}
}
return s.ret
}
/*
* Allocate memory for BCJ decoders. xzDecBCJReset must be used before
* calling xzDecBCJRun.
*/
func xzDecBCJCreate() *xzDecBCJ {
return new(xzDecBCJ)
}
/*
* Decode the Filter ID of a BCJ filter and check the start offset is
* valid. Returns xzOK if the given Filter ID and offset is
* supported. Otherwise xzOptionsError is returned.
*/
func xzDecBCJReset(s *xzDecBCJ, id xzFilterID, offset int) xzRet {
switch id {
case idBCJX86:
case idBCJPowerPC:
case idBCJIA64:
case idBCJARM:
case idBCJARMThumb:
case idBCJSPARC:
default:
/* Unsupported Filter ID */
return xzOptionsError
}
// check offset is a multiple of alignment
switch id {
case idBCJPowerPC, idBCJARM, idBCJSPARC:
if offset%4 != 0 {
return xzOptionsError
}
case idBCJIA64:
if offset%16 != 0 {
return xzOptionsError
}
case idBCJARMThumb:
if offset%2 != 0 {
return xzOptionsError
}
}
s.typ = id
s.ret = xzOK
s.pos = offset
s.x86PrevMask = 0
s.temp.filtered = 0
s.temp.buf = nil
return xzOK
}