githaven-fork/vendor/github.com/xi2/xz/dec_bcj.go

/*
 * Branch/Call/Jump (BCJ) filter decoders
 *
 * Authors: Lasse Collin <lasse.collin@tukaani.org>
 *          Igor Pavlov <http://7-zip.org/>
 *
 * Translation to Go: Michael Cross <https://github.com/xi2>
 *
 * This file has been put into the public domain.
 * You can do whatever you want with this file.
 */

package xz

/* from linux/lib/xz/xz_dec_bcj.c *************************************/

type xzDecBCJ struct {
	/* Type of the BCJ filter being used */
	typ xzFilterID
	/*
	 * Return value of the next filter in the chain. We need to preserve
	 * this information across calls, because we must not call the next
	 * filter anymore once it has returned xzStreamEnd
	 */
	ret xzRet
	/*
	 * Absolute position relative to the beginning of the uncompressed
	 * data (in a single .xz Block).
	 */
	pos int
	/* x86 filter state */
	x86PrevMask uint32
	/* Temporary space to hold the variables from xzBuf */
	out    []byte
	outPos int
	temp   struct {
		/* Amount of already filtered data in the beginning of buf */
		filtered int
		/*
		 * Buffer to hold a mix of filtered and unfiltered data. This
		 * needs to be big enough to hold Alignment + 2 * Look-ahead:
		 *
		 * Type         Alignment   Look-ahead
		 * x86              1           4
		 * PowerPC          4           0
		 * IA-64           16           0
		 * ARM              4           0
		 * ARM-Thumb        2           2
		 * SPARC            4           0
		 */
		buf      []byte // slice buf will be backed by bufArray
		bufArray [16]byte
	}
}

/*
 * This is used to test the most significant byte of a memory address
 * in an x86 instruction.
 */
func bcjX86TestMSByte(b byte) bool {
	return b == 0x00 || b == 0xff
}

func bcjX86Filter(s *xzDecBCJ, buf []byte) int {
	var maskToAllowedStatus = []bool{
		true, true, true, false, true, false, false, false,
	}
	var maskToBitNum = []byte{0, 1, 2, 2, 3, 3, 3, 3}
	var i int
	var prevPos int = -1
	var prevMask uint32 = s.x86PrevMask
	var src uint32
	var dest uint32
	var j uint32
	var b byte
	if len(buf) <= 4 {
		return 0
	}
	for i = 0; i < len(buf)-4; i++ {
		if buf[i]&0xfe != 0xe8 {
			continue
		}
		prevPos = i - prevPos
		if prevPos > 3 {
			prevMask = 0
		} else {
			prevMask = (prevMask << (uint(prevPos) - 1)) & 7
			if prevMask != 0 {
				b = buf[i+4-int(maskToBitNum[prevMask])]
				if !maskToAllowedStatus[prevMask] || bcjX86TestMSByte(b) {
					prevPos = i
					prevMask = prevMask<<1 | 1
					continue
				}
			}
		}
		prevPos = i
		if bcjX86TestMSByte(buf[i+4]) {
			src = getLE32(buf[i+1:])
			for {
				dest = src - uint32(s.pos+i+5)
				if prevMask == 0 {
					break
				}
				j = uint32(maskToBitNum[prevMask]) * 8
				b = byte(dest >> (24 - j))
				if !bcjX86TestMSByte(b) {
					break
				}
				src = dest ^ (1<<(32-j) - 1)
			}
			dest &= 0x01FFFFFF
			dest |= 0 - dest&0x01000000
			putLE32(dest, buf[i+1:])
			i += 4
		} else {
			prevMask = prevMask<<1 | 1
		}
	}
	prevPos = i - prevPos
	if prevPos > 3 {
		s.x86PrevMask = 0
	} else {
		s.x86PrevMask = prevMask << (uint(prevPos) - 1)
	}
	return i
}

func bcjPowerPCFilter(s *xzDecBCJ, buf []byte) int {
	var i int
	var instr uint32
	for i = 0; i+4 <= len(buf); i += 4 {
		instr = getBE32(buf[i:])
		if instr&0xFC000003 == 0x48000001 {
			instr &= 0x03FFFFFC
			instr -= uint32(s.pos + i)
			instr &= 0x03FFFFFC
			instr |= 0x48000001
			putBE32(instr, buf[i:])
		}
	}
	return i
}

var bcjIA64BranchTable = [...]byte{
	0, 0, 0, 0, 0, 0, 0, 0,
	0, 0, 0, 0, 0, 0, 0, 0,
	4, 4, 6, 6, 0, 0, 7, 7,
	4, 4, 0, 0, 4, 4, 0, 0,
}

func bcjIA64Filter(s *xzDecBCJ, buf []byte) int {
	var branchTable = bcjIA64BranchTable[:]
	/*
	 * The local variables take a little bit stack space, but it's less
	 * than what LZMA2 decoder takes, so it doesn't make sense to reduce
	 * stack usage here without doing that for the LZMA2 decoder too.
	 */
	/* Loop counters */
	var i int
	var j int
	/* Instruction slot (0, 1, or 2) in the 128-bit instruction word */
	var slot uint32
	/* Bitwise offset of the instruction indicated by slot */
	var bitPos uint32
	/* bit_pos split into byte and bit parts */
	var bytePos uint32
	var bitRes uint32
	/* Address part of an instruction */
	var addr uint32
	/* Mask used to detect which instructions to convert */
	var mask uint32
	/* 41-bit instruction stored somewhere in the lowest 48 bits */
	var instr uint64
	/* Instruction normalized with bit_res for easier manipulation */
	var norm uint64
	for i = 0; i+16 <= len(buf); i += 16 {
		mask = uint32(branchTable[buf[i]&0x1f])
		for slot, bitPos = 0, 5; slot < 3; slot, bitPos = slot+1, bitPos+41 {
			if (mask>>slot)&1 == 0 {
				continue
			}
			bytePos = bitPos >> 3
			bitRes = bitPos & 7
			instr = 0
			for j = 0; j < 6; j++ {
				instr |= uint64(buf[i+j+int(bytePos)]) << (8 * uint(j))
			}
			norm = instr >> bitRes
			if (norm>>37)&0x0f == 0x05 && (norm>>9)&0x07 == 0 {
				addr = uint32((norm >> 13) & 0x0fffff)
				addr |= (uint32(norm>>36) & 1) << 20
				addr <<= 4
				addr -= uint32(s.pos + i)
				addr >>= 4
				norm &= ^(uint64(0x8fffff) << 13)
				norm |= uint64(addr&0x0fffff) << 13
				norm |= uint64(addr&0x100000) << (36 - 20)
				instr &= 1<<bitRes - 1
				instr |= norm << bitRes
				for j = 0; j < 6; j++ {
					buf[i+j+int(bytePos)] = byte(instr >> (8 * uint(j)))
				}
			}
		}
	}
	return i
}

func bcjARMFilter(s *xzDecBCJ, buf []byte) int {
	var i int
	var addr uint32
	for i = 0; i+4 <= len(buf); i += 4 {
		if buf[i+3] == 0xeb {
			addr = uint32(buf[i]) | uint32(buf[i+1])<<8 |
				uint32(buf[i+2])<<16
			addr <<= 2
			addr -= uint32(s.pos + i + 8)
			addr >>= 2
			buf[i] = byte(addr)
			buf[i+1] = byte(addr >> 8)
			buf[i+2] = byte(addr >> 16)
		}
	}
	return i
}

func bcjARMThumbFilter(s *xzDecBCJ, buf []byte) int {
	var i int
	var addr uint32
	for i = 0; i+4 <= len(buf); i += 2 {
		if buf[i+1]&0xf8 == 0xf0 && buf[i+3]&0xf8 == 0xf8 {
			addr = uint32(buf[i+1]&0x07)<<19 |
				uint32(buf[i])<<11 |
				uint32(buf[i+3]&0x07)<<8 |
				uint32(buf[i+2])
			addr <<= 1
			addr -= uint32(s.pos + i + 4)
			addr >>= 1
			buf[i+1] = byte(0xf0 | (addr>>19)&0x07)
			buf[i] = byte(addr >> 11)
			buf[i+3] = byte(0xf8 | (addr>>8)&0x07)
			buf[i+2] = byte(addr)
			i += 2
		}
	}
	return i
}

func bcjSPARCFilter(s *xzDecBCJ, buf []byte) int {
	var i int
	var instr uint32
	for i = 0; i+4 <= len(buf); i += 4 {
		instr = getBE32(buf[i:])
		if instr>>22 == 0x100 || instr>>22 == 0x1ff {
			instr <<= 2
			instr -= uint32(s.pos + i)
			instr >>= 2
			instr = (0x40000000 - instr&0x400000) |
				0x40000000 | (instr & 0x3FFFFF)
			putBE32(instr, buf[i:])
		}
	}
	return i
}

/*
 * Apply the selected BCJ filter. Update *pos and s.pos to match the amount
 * of data that got filtered.
 */
func bcjApply(s *xzDecBCJ, buf []byte, pos *int) {
	var filtered int
	buf = buf[*pos:]
	switch s.typ {
	case idBCJX86:
		filtered = bcjX86Filter(s, buf)
	case idBCJPowerPC:
		filtered = bcjPowerPCFilter(s, buf)
	case idBCJIA64:
		filtered = bcjIA64Filter(s, buf)
	case idBCJARM:
		filtered = bcjARMFilter(s, buf)
	case idBCJARMThumb:
		filtered = bcjARMThumbFilter(s, buf)
	case idBCJSPARC:
		filtered = bcjSPARCFilter(s, buf)
	default:
		/* Never reached */
	}
	*pos += filtered
	s.pos += filtered
}

/*
 * Flush pending filtered data from temp to the output buffer.
 * Move the remaining mixture of possibly filtered and unfiltered
 * data to the beginning of temp.
 */
func bcjFlush(s *xzDecBCJ, b *xzBuf) {
	var copySize int
	copySize = len(b.out) - b.outPos
	if copySize > s.temp.filtered {
		copySize = s.temp.filtered
	}
	copy(b.out[b.outPos:], s.temp.buf[:copySize])
	b.outPos += copySize
	s.temp.filtered -= copySize
	copy(s.temp.buf, s.temp.buf[copySize:])
	s.temp.buf = s.temp.buf[:len(s.temp.buf)-copySize]
}

/*
 * Decode raw stream which has a BCJ filter as the first filter.
 *
 * The BCJ filter functions are primitive in sense that they process the
 * data in chunks of 1-16 bytes. To hide this issue, this function does
 * some buffering.
 */
func xzDecBCJRun(s *xzDecBCJ, b *xzBuf, chain func(*xzBuf) xzRet) xzRet {
	var outStart int
	/*
	 * Flush pending already filtered data to the output buffer. Return
	 * immediately if we couldn't flush everything, or if the next
	 * filter in the chain had already returned xzStreamEnd.
	 */
	if s.temp.filtered > 0 {
		bcjFlush(s, b)
		if s.temp.filtered > 0 {
			return xzOK
		}
		if s.ret == xzStreamEnd {
			return xzStreamEnd
		}
	}
	/*
	 * If we have more output space than what is currently pending in
	 * temp, copy the unfiltered data from temp to the output buffer
	 * and try to fill the output buffer by decoding more data from the
	 * next filter in the chain. Apply the BCJ filter on the new data
	 * in the output buffer. If everything cannot be filtered, copy it
	 * to temp and rewind the output buffer position accordingly.
	 *
	 * This needs to be always run when len(temp.buf) == 0 to handle a special
	 * case where the output buffer is full and the next filter has no
	 * more output coming but hasn't returned xzStreamEnd yet.
	 */
	if len(s.temp.buf) < len(b.out)-b.outPos || len(s.temp.buf) == 0 {
		outStart = b.outPos
		copy(b.out[b.outPos:], s.temp.buf)
		b.outPos += len(s.temp.buf)
		s.ret = chain(b)
		if s.ret != xzStreamEnd && s.ret != xzOK {
			return s.ret
		}
		bcjApply(s, b.out[:b.outPos], &outStart)
		/*
		 * As an exception, if the next filter returned xzStreamEnd,
		 * we can do that too, since the last few bytes that remain
		 * unfiltered are meant to remain unfiltered.
		 */
		if s.ret == xzStreamEnd {
			return xzStreamEnd
		}
		s.temp.buf = s.temp.bufArray[:b.outPos-outStart]
		b.outPos -= len(s.temp.buf)
		copy(s.temp.buf, b.out[b.outPos:])
		/*
		 * If there wasn't enough input to the next filter to fill
		 * the output buffer with unfiltered data, there's no point
		 * to try decoding more data to temp.
		 */
		if b.outPos+len(s.temp.buf) < len(b.out) {
			return xzOK
		}
	}
	/*
	 * We have unfiltered data in temp. If the output buffer isn't full
	 * yet, try to fill the temp buffer by decoding more data from the
	 * next filter. Apply the BCJ filter on temp. Then we hopefully can
	 * fill the actual output buffer by copying filtered data from temp.
	 * A mix of filtered and unfiltered data may be left in temp; it will
	 * be taken care on the next call to this function.
	 */
	if b.outPos < len(b.out) {
		/* Make b.out temporarily point to s.temp. */
		s.out = b.out
		s.outPos = b.outPos
		b.out = s.temp.bufArray[:]
		b.outPos = len(s.temp.buf)
		s.ret = chain(b)
		s.temp.buf = s.temp.bufArray[:b.outPos]
		b.out = s.out
		b.outPos = s.outPos
		if s.ret != xzOK && s.ret != xzStreamEnd {
			return s.ret
		}
		bcjApply(s, s.temp.buf, &s.temp.filtered)
		/*
		 * If the next filter returned xzStreamEnd, we mark that
		 * everything is filtered, since the last unfiltered bytes
		 * of the stream are meant to be left as is.
		 */
		if s.ret == xzStreamEnd {
			s.temp.filtered = len(s.temp.buf)
		}
		bcjFlush(s, b)
		if s.temp.filtered > 0 {
			return xzOK
		}
	}
	return s.ret
}

/*
 * Allocate memory for BCJ decoders. xzDecBCJReset must be used before
 * calling xzDecBCJRun.
 */
func xzDecBCJCreate() *xzDecBCJ {
	return new(xzDecBCJ)
}

/*
 * Decode the Filter ID of a BCJ filter and check the start offset is
 * valid. Returns xzOK if the given Filter ID and offset is
 * supported. Otherwise xzOptionsError is returned.
 */
func xzDecBCJReset(s *xzDecBCJ, id xzFilterID, offset int) xzRet {
	switch id {
	case idBCJX86:
	case idBCJPowerPC:
	case idBCJIA64:
	case idBCJARM:
	case idBCJARMThumb:
	case idBCJSPARC:
	default:
		/* Unsupported Filter ID */
		return xzOptionsError
	}
	// check offset is a multiple of alignment
	switch id {
	case idBCJPowerPC, idBCJARM, idBCJSPARC:
		if offset%4 != 0 {
			return xzOptionsError
		}
	case idBCJIA64:
		if offset%16 != 0 {
			return xzOptionsError
		}
	case idBCJARMThumb:
		if offset%2 != 0 {
			return xzOptionsError
		}
	}
	s.typ = id
	s.ret = xzOK
	s.pos = offset
	s.x86PrevMask = 0
	s.temp.filtered = 0
	s.temp.buf = nil
	return xzOK
}
Dump: add output format tar and output to stdout (#10376) * Dump: Use mholt/archive/v3 to support tar including many compressions Signed-off-by: Philipp Homann <homann.philipp@googlemail.com> * Dump: Allow dump output to stdout Signed-off-by: Philipp Homann <homann.philipp@googlemail.com> * Dump: Fixed bug present since #6677 where SessionConfig.Provider is never "file" Signed-off-by: Philipp Homann <homann.philipp@googlemail.com> * Dump: never pack RepoRootPath, LFS.ContentPath and LogRootPath when they are below AppDataPath Signed-off-by: Philipp Homann <homann.philipp@googlemail.com> * Dump: also dump LFS (fixes #10058) Signed-off-by: Philipp Homann <homann.philipp@googlemail.com> * Dump: never dump CustomPath if CustomPath is a subdir of or equal to AppDataPath (fixes #10365) Signed-off-by: Philipp Homann <homann.philipp@googlemail.com> * Use log.Info instead of fmt.Fprintf Signed-off-by: Philipp Homann <homann.philipp@googlemail.com> * import ordering * make fmt Co-authored-by: zeripath <art27@cantab.net> Co-authored-by: techknowlogick <techknowlogick@gitea.io> Co-authored-by: Matti R <matti@mdranta.net> 2020-06-05 20:47:39 +00:00			`/*`
			`* Branch/Call/Jump (BCJ) filter decoders`
			`*`
			`* Authors: Lasse Collin <lasse.collin@tukaani.org>`
			`* Igor Pavlov <http://7-zip.org/>`
			`*`
			`* Translation to Go: Michael Cross <https://github.com/xi2>`
			`*`
			`* This file has been put into the public domain.`
			`* You can do whatever you want with this file.`
			`*/`

			`package xz`

			`/* from linux/lib/xz/xz_dec_bcj.c *************************************/`

			`type xzDecBCJ struct {`
			`/* Type of the BCJ filter being used */`
			`typ xzFilterID`
			`/*`
			`* Return value of the next filter in the chain. We need to preserve`
			`* this information across calls, because we must not call the next`
			`* filter anymore once it has returned xzStreamEnd`
			`*/`
			`ret xzRet`
			`/*`
			`* Absolute position relative to the beginning of the uncompressed`
			`* data (in a single .xz Block).`
			`*/`
			`pos int`
			`/* x86 filter state */`
			`x86PrevMask uint32`
			`/* Temporary space to hold the variables from xzBuf */`
			`out []byte`
			`outPos int`
			`temp struct {`
			`/* Amount of already filtered data in the beginning of buf */`
			`filtered int`
			`/*`
			`* Buffer to hold a mix of filtered and unfiltered data. This`
			`* needs to be big enough to hold Alignment + 2 * Look-ahead:`
			`*`
			`* Type Alignment Look-ahead`
			`* x86 1 4`
			`* PowerPC 4 0`
			`* IA-64 16 0`
			`* ARM 4 0`
			`* ARM-Thumb 2 2`
			`* SPARC 4 0`
			`*/`
			`buf []byte // slice buf will be backed by bufArray`
			`bufArray [16]byte`
			`}`
			`}`

			`/*`
			`* This is used to test the most significant byte of a memory address`
			`* in an x86 instruction.`
			`*/`
			`func bcjX86TestMSByte(b byte) bool {`
			`return b == 0x00 \|\| b == 0xff`
			`}`

			`func bcjX86Filter(s *xzDecBCJ, buf []byte) int {`
			`var maskToAllowedStatus = []bool{`
			`true, true, true, false, true, false, false, false,`
			`}`
			`var maskToBitNum = []byte{0, 1, 2, 2, 3, 3, 3, 3}`
			`var i int`
			`var prevPos int = -1`
			`var prevMask uint32 = s.x86PrevMask`
			`var src uint32`
			`var dest uint32`
			`var j uint32`
			`var b byte`
			`if len(buf) <= 4 {`
			`return 0`
			`}`
			`for i = 0; i < len(buf)-4; i++ {`
			`if buf[i]&0xfe != 0xe8 {`
			`continue`
			`}`
			`prevPos = i - prevPos`
			`if prevPos > 3 {`
			`prevMask = 0`
			`} else {`
			`prevMask = (prevMask << (uint(prevPos) - 1)) & 7`
			`if prevMask != 0 {`
			`b = buf[i+4-int(maskToBitNum[prevMask])]`
			`if !maskToAllowedStatus[prevMask] \|\| bcjX86TestMSByte(b) {`
			`prevPos = i`
			`prevMask = prevMask<<1 \| 1`
			`continue`
			`}`
			`}`
			`}`
			`prevPos = i`
			`if bcjX86TestMSByte(buf[i+4]) {`
			`src = getLE32(buf[i+1:])`
			`for {`
			`dest = src - uint32(s.pos+i+5)`
			`if prevMask == 0 {`
			`break`
			`}`
			`j = uint32(maskToBitNum[prevMask]) * 8`
			`b = byte(dest >> (24 - j))`
			`if !bcjX86TestMSByte(b) {`
			`break`
			`}`
			`src = dest ^ (1<<(32-j) - 1)`
			`}`
			`dest &= 0x01FFFFFF`
			`dest \|= 0 - dest&0x01000000`
			`putLE32(dest, buf[i+1:])`
			`i += 4`
			`} else {`
			`prevMask = prevMask<<1 \| 1`
			`}`
			`}`
			`prevPos = i - prevPos`
			`if prevPos > 3 {`
			`s.x86PrevMask = 0`
			`} else {`
			`s.x86PrevMask = prevMask << (uint(prevPos) - 1)`
			`}`
			`return i`
			`}`

			`func bcjPowerPCFilter(s *xzDecBCJ, buf []byte) int {`
			`var i int`
			`var instr uint32`
			`for i = 0; i+4 <= len(buf); i += 4 {`
			`instr = getBE32(buf[i:])`
			`if instr&0xFC000003 == 0x48000001 {`
			`instr &= 0x03FFFFFC`
			`instr -= uint32(s.pos + i)`
			`instr &= 0x03FFFFFC`
			`instr \|= 0x48000001`
			`putBE32(instr, buf[i:])`
			`}`
			`}`
			`return i`
			`}`

			`var bcjIA64BranchTable = [...]byte{`
			`0, 0, 0, 0, 0, 0, 0, 0,`
			`0, 0, 0, 0, 0, 0, 0, 0,`
			`4, 4, 6, 6, 0, 0, 7, 7,`
			`4, 4, 0, 0, 4, 4, 0, 0,`
			`}`

			`func bcjIA64Filter(s *xzDecBCJ, buf []byte) int {`
			`var branchTable = bcjIA64BranchTable[:]`
			`/*`
			`* The local variables take a little bit stack space, but it's less`
			`* than what LZMA2 decoder takes, so it doesn't make sense to reduce`
			`* stack usage here without doing that for the LZMA2 decoder too.`
			`*/`
			`/* Loop counters */`
			`var i int`
			`var j int`
			`/* Instruction slot (0, 1, or 2) in the 128-bit instruction word */`
			`var slot uint32`
			`/* Bitwise offset of the instruction indicated by slot */`
			`var bitPos uint32`
			`/* bit_pos split into byte and bit parts */`
			`var bytePos uint32`
			`var bitRes uint32`
			`/* Address part of an instruction */`
			`var addr uint32`
			`/* Mask used to detect which instructions to convert */`
			`var mask uint32`
			`/* 41-bit instruction stored somewhere in the lowest 48 bits */`
			`var instr uint64`
			`/* Instruction normalized with bit_res for easier manipulation */`
			`var norm uint64`
			`for i = 0; i+16 <= len(buf); i += 16 {`
			`mask = uint32(branchTable[buf[i]&0x1f])`
			`for slot, bitPos = 0, 5; slot < 3; slot, bitPos = slot+1, bitPos+41 {`
			`if (mask>>slot)&1 == 0 {`
			`continue`
			`}`
			`bytePos = bitPos >> 3`
			`bitRes = bitPos & 7`
			`instr = 0`
			`for j = 0; j < 6; j++ {`
			`instr \|= uint64(buf[i+j+int(bytePos)]) << (8 * uint(j))`
			`}`
			`norm = instr >> bitRes`
			`if (norm>>37)&0x0f == 0x05 && (norm>>9)&0x07 == 0 {`
			`addr = uint32((norm >> 13) & 0x0fffff)`
			`addr \|= (uint32(norm>>36) & 1) << 20`
			`addr <<= 4`
			`addr -= uint32(s.pos + i)`
			`addr >>= 4`
			`norm &= ^(uint64(0x8fffff) << 13)`
			`norm \|= uint64(addr&0x0fffff) << 13`
			`norm \|= uint64(addr&0x100000) << (36 - 20)`
			`instr &= 1<<bitRes - 1`
			`instr \|= norm << bitRes`
			`for j = 0; j < 6; j++ {`
			`buf[i+j+int(bytePos)] = byte(instr >> (8 * uint(j)))`
			`}`
			`}`
			`}`
			`}`
			`return i`
			`}`

			`func bcjARMFilter(s *xzDecBCJ, buf []byte) int {`
			`var i int`
			`var addr uint32`
			`for i = 0; i+4 <= len(buf); i += 4 {`
			`if buf[i+3] == 0xeb {`
			`addr = uint32(buf[i]) \| uint32(buf[i+1])<<8 \|`
			`uint32(buf[i+2])<<16`
			`addr <<= 2`
			`addr -= uint32(s.pos + i + 8)`
			`addr >>= 2`
			`buf[i] = byte(addr)`
			`buf[i+1] = byte(addr >> 8)`
			`buf[i+2] = byte(addr >> 16)`
			`}`
			`}`
			`return i`
			`}`

			`func bcjARMThumbFilter(s *xzDecBCJ, buf []byte) int {`
			`var i int`
			`var addr uint32`
			`for i = 0; i+4 <= len(buf); i += 2 {`
			`if buf[i+1]&0xf8 == 0xf0 && buf[i+3]&0xf8 == 0xf8 {`
			`addr = uint32(buf[i+1]&0x07)<<19 \|`
			`uint32(buf[i])<<11 \|`
			`uint32(buf[i+3]&0x07)<<8 \|`
			`uint32(buf[i+2])`
			`addr <<= 1`
			`addr -= uint32(s.pos + i + 4)`
			`addr >>= 1`
			`buf[i+1] = byte(0xf0 \| (addr>>19)&0x07)`
			`buf[i] = byte(addr >> 11)`
			`buf[i+3] = byte(0xf8 \| (addr>>8)&0x07)`
			`buf[i+2] = byte(addr)`
			`i += 2`
			`}`
			`}`
			`return i`
			`}`

			`func bcjSPARCFilter(s *xzDecBCJ, buf []byte) int {`
			`var i int`
			`var instr uint32`
			`for i = 0; i+4 <= len(buf); i += 4 {`
			`instr = getBE32(buf[i:])`
			`if instr>>22 == 0x100 \|\| instr>>22 == 0x1ff {`
			`instr <<= 2`
			`instr -= uint32(s.pos + i)`
			`instr >>= 2`
			`instr = (0x40000000 - instr&0x400000) \|`
			`0x40000000 \| (instr & 0x3FFFFF)`
			`putBE32(instr, buf[i:])`
			`}`
			`}`
			`return i`
			`}`

			`/*`
			`* Apply the selected BCJ filter. Update *pos and s.pos to match the amount`
			`* of data that got filtered.`
			`*/`
			`func bcjApply(s xzDecBCJ, buf []byte, pos int) {`
			`var filtered int`
			`buf = buf[*pos:]`
			`switch s.typ {`
			`case idBCJX86:`
			`filtered = bcjX86Filter(s, buf)`
			`case idBCJPowerPC:`
			`filtered = bcjPowerPCFilter(s, buf)`
			`case idBCJIA64:`
			`filtered = bcjIA64Filter(s, buf)`
			`case idBCJARM:`
			`filtered = bcjARMFilter(s, buf)`
			`case idBCJARMThumb:`
			`filtered = bcjARMThumbFilter(s, buf)`
			`case idBCJSPARC:`
			`filtered = bcjSPARCFilter(s, buf)`
			`default:`
			`/* Never reached */`
			`}`
			`*pos += filtered`
			`s.pos += filtered`
			`}`

			`/*`
			`* Flush pending filtered data from temp to the output buffer.`
			`* Move the remaining mixture of possibly filtered and unfiltered`
			`* data to the beginning of temp.`
			`*/`
			`func bcjFlush(s xzDecBCJ, b xzBuf) {`
			`var copySize int`
			`copySize = len(b.out) - b.outPos`
			`if copySize > s.temp.filtered {`
			`copySize = s.temp.filtered`
			`}`
			`copy(b.out[b.outPos:], s.temp.buf[:copySize])`
			`b.outPos += copySize`
			`s.temp.filtered -= copySize`
			`copy(s.temp.buf, s.temp.buf[copySize:])`
			`s.temp.buf = s.temp.buf[:len(s.temp.buf)-copySize]`
			`}`

			`/*`
			`* Decode raw stream which has a BCJ filter as the first filter.`
			`*`
			`* The BCJ filter functions are primitive in sense that they process the`
			`* data in chunks of 1-16 bytes. To hide this issue, this function does`
			`* some buffering.`
			`*/`
			`func xzDecBCJRun(s xzDecBCJ, b xzBuf, chain func(*xzBuf) xzRet) xzRet {`
			`var outStart int`
			`/*`
			`* Flush pending already filtered data to the output buffer. Return`
			`* immediately if we couldn't flush everything, or if the next`
			`* filter in the chain had already returned xzStreamEnd.`
			`*/`
			`if s.temp.filtered > 0 {`
			`bcjFlush(s, b)`
			`if s.temp.filtered > 0 {`
			`return xzOK`
			`}`
			`if s.ret == xzStreamEnd {`
			`return xzStreamEnd`
			`}`
			`}`
			`/*`
			`* If we have more output space than what is currently pending in`
			`* temp, copy the unfiltered data from temp to the output buffer`
			`* and try to fill the output buffer by decoding more data from the`
			`* next filter in the chain. Apply the BCJ filter on the new data`
			`* in the output buffer. If everything cannot be filtered, copy it`
			`* to temp and rewind the output buffer position accordingly.`
			`*`
			`* This needs to be always run when len(temp.buf) == 0 to handle a special`
			`* case where the output buffer is full and the next filter has no`
			`* more output coming but hasn't returned xzStreamEnd yet.`
			`*/`
			`if len(s.temp.buf) < len(b.out)-b.outPos \|\| len(s.temp.buf) == 0 {`
			`outStart = b.outPos`
			`copy(b.out[b.outPos:], s.temp.buf)`
			`b.outPos += len(s.temp.buf)`
			`s.ret = chain(b)`
			`if s.ret != xzStreamEnd && s.ret != xzOK {`
			`return s.ret`
			`}`
			`bcjApply(s, b.out[:b.outPos], &outStart)`
			`/*`
			`* As an exception, if the next filter returned xzStreamEnd,`
			`* we can do that too, since the last few bytes that remain`
			`* unfiltered are meant to remain unfiltered.`
			`*/`
			`if s.ret == xzStreamEnd {`
			`return xzStreamEnd`
			`}`
			`s.temp.buf = s.temp.bufArray[:b.outPos-outStart]`
			`b.outPos -= len(s.temp.buf)`
			`copy(s.temp.buf, b.out[b.outPos:])`
			`/*`
			`* If there wasn't enough input to the next filter to fill`
			`* the output buffer with unfiltered data, there's no point`
			`* to try decoding more data to temp.`
			`*/`
			`if b.outPos+len(s.temp.buf) < len(b.out) {`
			`return xzOK`
			`}`
			`}`
			`/*`
			`* We have unfiltered data in temp. If the output buffer isn't full`
			`* yet, try to fill the temp buffer by decoding more data from the`
			`* next filter. Apply the BCJ filter on temp. Then we hopefully can`
			`* fill the actual output buffer by copying filtered data from temp.`
			`* A mix of filtered and unfiltered data may be left in temp; it will`
			`* be taken care on the next call to this function.`
			`*/`
			`if b.outPos < len(b.out) {`
			`/* Make b.out temporarily point to s.temp. */`
			`s.out = b.out`
			`s.outPos = b.outPos`
			`b.out = s.temp.bufArray[:]`
			`b.outPos = len(s.temp.buf)`
			`s.ret = chain(b)`
			`s.temp.buf = s.temp.bufArray[:b.outPos]`
			`b.out = s.out`
			`b.outPos = s.outPos`
			`if s.ret != xzOK && s.ret != xzStreamEnd {`
			`return s.ret`
			`}`
			`bcjApply(s, s.temp.buf, &s.temp.filtered)`
			`/*`
			`* If the next filter returned xzStreamEnd, we mark that`
			`* everything is filtered, since the last unfiltered bytes`
			`* of the stream are meant to be left as is.`
			`*/`
			`if s.ret == xzStreamEnd {`
			`s.temp.filtered = len(s.temp.buf)`
			`}`
			`bcjFlush(s, b)`
			`if s.temp.filtered > 0 {`
			`return xzOK`
			`}`
			`}`
			`return s.ret`
			`}`

			`/*`
			`* Allocate memory for BCJ decoders. xzDecBCJReset must be used before`
			`* calling xzDecBCJRun.`
			`*/`
			`func xzDecBCJCreate() *xzDecBCJ {`
			`return new(xzDecBCJ)`
			`}`

			`/*`
			`* Decode the Filter ID of a BCJ filter and check the start offset is`
			`* valid. Returns xzOK if the given Filter ID and offset is`
			`* supported. Otherwise xzOptionsError is returned.`
			`*/`
			`func xzDecBCJReset(s *xzDecBCJ, id xzFilterID, offset int) xzRet {`
			`switch id {`
			`case idBCJX86:`
			`case idBCJPowerPC:`
			`case idBCJIA64:`
			`case idBCJARM:`
			`case idBCJARMThumb:`
			`case idBCJSPARC:`
			`default:`
			`/* Unsupported Filter ID */`
			`return xzOptionsError`
			`}`
			`// check offset is a multiple of alignment`
			`switch id {`
			`case idBCJPowerPC, idBCJARM, idBCJSPARC:`
			`if offset%4 != 0 {`
			`return xzOptionsError`
			`}`
			`case idBCJIA64:`
			`if offset%16 != 0 {`
			`return xzOptionsError`
			`}`
			`case idBCJARMThumb:`
			`if offset%2 != 0 {`
			`return xzOptionsError`
			`}`
			`}`
			`s.typ = id`
			`s.ret = xzOK`
			`s.pos = offset`
			`s.x86PrevMask = 0`
			`s.temp.filtered = 0`
			`s.temp.buf = nil`
			`return xzOK`
			`}`