githaven-fork/vendor/github.com/dsnet/compress/internal/prefix/reader.go

336 lines
9.1 KiB
Go
Raw Normal View History

// Copyright 2015, Joe Tsai. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE.md file.
package prefix
import (
"bufio"
"bytes"
"encoding/binary"
"io"
"strings"
"github.com/dsnet/compress"
"github.com/dsnet/compress/internal"
"github.com/dsnet/compress/internal/errors"
)
// Reader implements a prefix decoder. If the input io.Reader satisfies the
// compress.ByteReader or compress.BufferedReader interface, then it also
// guarantees that it will never read more bytes than is necessary.
//
// For high performance, provide an io.Reader that satisfies the
// compress.BufferedReader interface. If the input does not satisfy either
// compress.ByteReader or compress.BufferedReader, then it will be internally
// wrapped with a bufio.Reader.
type Reader struct {
Offset int64 // Number of bytes read from the underlying io.Reader
rd io.Reader
byteRd compress.ByteReader // Set if rd is a ByteReader
bufRd compress.BufferedReader // Set if rd is a BufferedReader
bufBits uint64 // Buffer to hold some bits
numBits uint // Number of valid bits in bufBits
bigEndian bool // Do we treat input bytes as big endian?
// These fields are only used if rd is a compress.BufferedReader.
bufPeek []byte // Buffer for the Peek data
discardBits int // Number of bits to discard from reader
fedBits uint // Number of bits fed in last call to PullBits
// These fields are used to reduce allocations.
bb *buffer
br *bytesReader
sr *stringReader
bu *bufio.Reader
}
// Init initializes the bit Reader to read from r. If bigEndian is true, then
// bits will be read starting from the most-significant bits of a byte
// (as done in bzip2), otherwise it will read starting from the
// least-significant bits of a byte (such as for deflate and brotli).
func (pr *Reader) Init(r io.Reader, bigEndian bool) {
*pr = Reader{
rd: r,
bigEndian: bigEndian,
bb: pr.bb,
br: pr.br,
sr: pr.sr,
bu: pr.bu,
}
switch rr := r.(type) {
case *bytes.Buffer:
if pr.bb == nil {
pr.bb = new(buffer)
}
*pr.bb = buffer{Buffer: rr}
pr.bufRd = pr.bb
case *bytes.Reader:
if pr.br == nil {
pr.br = new(bytesReader)
}
*pr.br = bytesReader{Reader: rr}
pr.bufRd = pr.br
case *strings.Reader:
if pr.sr == nil {
pr.sr = new(stringReader)
}
*pr.sr = stringReader{Reader: rr}
pr.bufRd = pr.sr
case compress.BufferedReader:
pr.bufRd = rr
case compress.ByteReader:
pr.byteRd = rr
default:
if pr.bu == nil {
pr.bu = bufio.NewReader(nil)
}
pr.bu.Reset(r)
pr.rd, pr.bufRd = pr.bu, pr.bu
}
}
// BitsRead reports the total number of bits emitted from any Read method.
func (pr *Reader) BitsRead() int64 {
offset := 8*pr.Offset - int64(pr.numBits)
if pr.bufRd != nil {
discardBits := pr.discardBits + int(pr.fedBits-pr.numBits)
offset = 8*pr.Offset + int64(discardBits)
}
return offset
}
// IsBufferedReader reports whether the underlying io.Reader is also a
// compress.BufferedReader.
func (pr *Reader) IsBufferedReader() bool {
return pr.bufRd != nil
}
// ReadPads reads 0-7 bits from the bit buffer to achieve byte-alignment.
func (pr *Reader) ReadPads() uint {
nb := pr.numBits % 8
val := uint(pr.bufBits & uint64(1<<nb-1))
pr.bufBits >>= nb
pr.numBits -= nb
return val
}
// Read reads bytes into buf.
// The bit-ordering mode does not affect this method.
func (pr *Reader) Read(buf []byte) (cnt int, err error) {
if pr.numBits > 0 {
if pr.numBits%8 != 0 {
return 0, errorf(errors.Invalid, "non-aligned bit buffer")
}
for cnt = 0; len(buf) > cnt && pr.numBits > 0; cnt++ {
if pr.bigEndian {
buf[cnt] = internal.ReverseLUT[byte(pr.bufBits)]
} else {
buf[cnt] = byte(pr.bufBits)
}
pr.bufBits >>= 8
pr.numBits -= 8
}
return cnt, nil
}
if _, err := pr.Flush(); err != nil {
return 0, err
}
cnt, err = pr.rd.Read(buf)
pr.Offset += int64(cnt)
return cnt, err
}
// ReadOffset reads an offset value using the provided RangeCodes indexed by
// the symbol read.
func (pr *Reader) ReadOffset(pd *Decoder, rcs RangeCodes) uint {
rc := rcs[pr.ReadSymbol(pd)]
return uint(rc.Base) + pr.ReadBits(uint(rc.Len))
}
// TryReadBits attempts to read nb bits using the contents of the bit buffer
// alone. It returns the value and whether it succeeded.
//
// This method is designed to be inlined for performance reasons.
func (pr *Reader) TryReadBits(nb uint) (uint, bool) {
if pr.numBits < nb {
return 0, false
}
val := uint(pr.bufBits & uint64(1<<nb-1))
pr.bufBits >>= nb
pr.numBits -= nb
return val, true
}
// ReadBits reads nb bits in from the underlying reader.
func (pr *Reader) ReadBits(nb uint) uint {
if err := pr.PullBits(nb); err != nil {
errors.Panic(err)
}
val := uint(pr.bufBits & uint64(1<<nb-1))
pr.bufBits >>= nb
pr.numBits -= nb
return val
}
// TryReadSymbol attempts to decode the next symbol using the contents of the
// bit buffer alone. It returns the decoded symbol and whether it succeeded.
//
// This method is designed to be inlined for performance reasons.
func (pr *Reader) TryReadSymbol(pd *Decoder) (uint, bool) {
if pr.numBits < uint(pd.MinBits) || len(pd.chunks) == 0 {
return 0, false
}
chunk := pd.chunks[uint32(pr.bufBits)&pd.chunkMask]
nb := uint(chunk & countMask)
if nb > pr.numBits || nb > uint(pd.chunkBits) {
return 0, false
}
pr.bufBits >>= nb
pr.numBits -= nb
return uint(chunk >> countBits), true
}
// ReadSymbol reads the next symbol using the provided prefix Decoder.
func (pr *Reader) ReadSymbol(pd *Decoder) uint {
if len(pd.chunks) == 0 {
panicf(errors.Invalid, "decode with empty prefix tree")
}
nb := uint(pd.MinBits)
for {
if err := pr.PullBits(nb); err != nil {
errors.Panic(err)
}
chunk := pd.chunks[uint32(pr.bufBits)&pd.chunkMask]
nb = uint(chunk & countMask)
if nb > uint(pd.chunkBits) {
linkIdx := chunk >> countBits
chunk = pd.links[linkIdx][uint32(pr.bufBits>>pd.chunkBits)&pd.linkMask]
nb = uint(chunk & countMask)
}
if nb <= pr.numBits {
pr.bufBits >>= nb
pr.numBits -= nb
return uint(chunk >> countBits)
}
}
}
// Flush updates the read offset of the underlying ByteReader.
// If reader is a compress.BufferedReader, then this calls Discard to update
// the read offset.
func (pr *Reader) Flush() (int64, error) {
if pr.bufRd == nil {
return pr.Offset, nil
}
// Update the number of total bits to discard.
pr.discardBits += int(pr.fedBits - pr.numBits)
pr.fedBits = pr.numBits
// Discard some bytes to update read offset.
var err error
nd := (pr.discardBits + 7) / 8 // Round up to nearest byte
nd, err = pr.bufRd.Discard(nd)
pr.discardBits -= nd * 8 // -7..0
pr.Offset += int64(nd)
// These are invalid after Discard.
pr.bufPeek = nil
return pr.Offset, err
}
// PullBits ensures that at least nb bits exist in the bit buffer.
// If the underlying reader is a compress.BufferedReader, then this will fill
// the bit buffer with as many bits as possible, relying on Peek and Discard to
// properly advance the read offset. Otherwise, it will use ReadByte to fill the
// buffer with just the right number of bits.
func (pr *Reader) PullBits(nb uint) error {
if pr.bufRd != nil {
pr.discardBits += int(pr.fedBits - pr.numBits)
for {
if len(pr.bufPeek) == 0 {
pr.fedBits = pr.numBits // Don't discard bits just added
if _, err := pr.Flush(); err != nil {
return err
}
// Peek no more bytes than necessary.
// The computation for cntPeek computes the minimum number of
// bytes to Peek to fill nb bits.
var err error
cntPeek := int(nb+(-nb&7)) / 8
if cntPeek < pr.bufRd.Buffered() {
cntPeek = pr.bufRd.Buffered()
}
pr.bufPeek, err = pr.bufRd.Peek(cntPeek)
pr.bufPeek = pr.bufPeek[int(pr.numBits/8):] // Skip buffered bits
if len(pr.bufPeek) == 0 {
if pr.numBits >= nb {
break
}
if err == io.EOF {
err = io.ErrUnexpectedEOF
}
return err
}
}
n := int(64-pr.numBits) / 8 // Number of bytes to copy to bit buffer
if len(pr.bufPeek) >= 8 {
// Starting with Go 1.7, the compiler should use a wide integer
// load here if the architecture supports it.
u := binary.LittleEndian.Uint64(pr.bufPeek)
if pr.bigEndian {
// Swap all the bits within each byte.
u = (u&0xaaaaaaaaaaaaaaaa)>>1 | (u&0x5555555555555555)<<1
u = (u&0xcccccccccccccccc)>>2 | (u&0x3333333333333333)<<2
u = (u&0xf0f0f0f0f0f0f0f0)>>4 | (u&0x0f0f0f0f0f0f0f0f)<<4
}
pr.bufBits |= u << pr.numBits
pr.numBits += uint(n * 8)
pr.bufPeek = pr.bufPeek[n:]
break
} else {
if n > len(pr.bufPeek) {
n = len(pr.bufPeek)
}
for _, c := range pr.bufPeek[:n] {
if pr.bigEndian {
c = internal.ReverseLUT[c]
}
pr.bufBits |= uint64(c) << pr.numBits
pr.numBits += 8
}
pr.bufPeek = pr.bufPeek[n:]
if pr.numBits > 56 {
break
}
}
}
pr.fedBits = pr.numBits
} else {
for pr.numBits < nb {
c, err := pr.byteRd.ReadByte()
if err != nil {
if err == io.EOF {
err = io.ErrUnexpectedEOF
}
return err
}
if pr.bigEndian {
c = internal.ReverseLUT[c]
}
pr.bufBits |= uint64(c) << pr.numBits
pr.numBits += 8
pr.Offset++
}
}
return nil
}