1
1
mirror of https://github.com/go-gitea/gitea synced 2025-08-24 18:38:28 +00:00

Vendor Update Go Libs (#13444)

* denisenkom/go-mssqldb untagged -> v0.9.0

* github.com/editorconfig/editorconfig-core-go v2.3.7 -> v2.3.8

* github.com/go-testfixtures/testfixtures v3.4.0 -> v3.4.1

* github.com/mholt/archiver v3.3.2 -> v3.5.0

* github.com/olivere/elastic v7.0.20 -> v7.0.21

* github.com/urfave/cli v1.22.4 -> v1.22.5

* github.com/xanzy/go-gitlab v0.38.1 -> v0.39.0

* github.com/yuin/goldmark-meta untagged -> v1.0.0

* github.com/ethantkoenig/rupture 0a76f03a811a -> c3b3b810dc77

* github.com/jaytaylor/html2text 8fb95d837f7d -> 3577fbdbcff7

* github.com/kballard/go-shellquote cd60e84ee657 -> 95032a82bc51

* github.com/msteinert/pam 02ccfbfaf0cc -> 913b8f8cdf8b

* github.com/unknwon/paginater 7748a72e0141 -> 042474bd0eae

* CI.restart()

Co-authored-by: techknowlogick <techknowlogick@gitea.io>
This commit is contained in:
6543
2020-11-06 19:41:42 +01:00
committed by GitHub
parent eebaa81f43
commit 30ce3731a1
184 changed files with 12387 additions and 2975 deletions

View File

@@ -0,0 +1,469 @@
package lz4block
import (
"encoding/binary"
"math/bits"
"sync"
"github.com/pierrec/lz4/v4/internal/lz4errors"
)
const (
// The following constants are used to setup the compression algorithm.
minMatch = 4 // the minimum size of the match sequence size (4 bytes)
winSizeLog = 16 // LZ4 64Kb window size limit
winSize = 1 << winSizeLog
winMask = winSize - 1 // 64Kb window of previous data for dependent blocks
// hashLog determines the size of the hash table used to quickly find a previous match position.
// Its value influences the compression speed and memory usage, the lower the faster,
// but at the expense of the compression ratio.
// 16 seems to be the best compromise for fast compression.
hashLog = 16
htSize = 1 << hashLog
mfLimit = 10 + minMatch // The last match cannot start within the last 14 bytes.
)
func recoverBlock(e *error) {
if r := recover(); r != nil && *e == nil {
*e = lz4errors.ErrInvalidSourceShortBuffer
}
}
// blockHash hashes the lower 6 bytes into a value < htSize.
func blockHash(x uint64) uint32 {
const prime6bytes = 227718039650203
return uint32(((x << (64 - 48)) * prime6bytes) >> (64 - hashLog))
}
func CompressBlockBound(n int) int {
return n + n/255 + 16
}
func UncompressBlock(src, dst []byte) (int, error) {
if len(src) == 0 {
return 0, nil
}
if di := decodeBlock(dst, src); di >= 0 {
return di, nil
}
return 0, lz4errors.ErrInvalidSourceShortBuffer
}
type Compressor struct {
// Offsets are at most 64kiB, so we can store only the lower 16 bits of
// match positions: effectively, an offset from some 64kiB block boundary.
//
// When we retrieve such an offset, we interpret it as relative to the last
// block boundary si &^ 0xffff, or the one before, (si &^ 0xffff) - 0x10000,
// depending on which of these is inside the current window. If a table
// entry was generated more than 64kiB back in the input, we find out by
// inspecting the input stream.
table [htSize]uint16
needsReset bool
}
// Get returns the position of a presumptive match for the hash h.
// The match may be a false positive due to a hash collision or an old entry.
// If si < winSize, the return value may be negative.
func (c *Compressor) get(h uint32, si int) int {
h &= htSize - 1
i := int(c.table[h])
i += si &^ winMask
if i >= si {
// Try previous 64kiB block (negative when in first block).
i -= winSize
}
return i
}
func (c *Compressor) put(h uint32, si int) {
h &= htSize - 1
c.table[h] = uint16(si)
}
var compressorPool = sync.Pool{New: func() interface{} { return new(Compressor) }}
func CompressBlock(src, dst []byte) (int, error) {
c := compressorPool.Get().(*Compressor)
n, err := c.CompressBlock(src, dst)
compressorPool.Put(c)
return n, err
}
func (c *Compressor) CompressBlock(src, dst []byte) (int, error) {
if c.needsReset {
// Zero out reused table to avoid non-deterministic output (issue #65).
c.table = [htSize]uint16{}
}
c.needsReset = true // Only false on first call.
// Return 0, nil only if the destination buffer size is < CompressBlockBound.
isNotCompressible := len(dst) < CompressBlockBound(len(src))
// adaptSkipLog sets how quickly the compressor begins skipping blocks when data is incompressible.
// This significantly speeds up incompressible data and usually has very small impact on compression.
// bytes to skip = 1 + (bytes since last match >> adaptSkipLog)
const adaptSkipLog = 7
// si: Current position of the search.
// anchor: Position of the current literals.
var si, di, anchor int
sn := len(src) - mfLimit
if sn <= 0 {
goto lastLiterals
}
// Fast scan strategy: the hash table only stores the last 4 bytes sequences.
for si < sn {
// Hash the next 6 bytes (sequence)...
match := binary.LittleEndian.Uint64(src[si:])
h := blockHash(match)
h2 := blockHash(match >> 8)
// We check a match at s, s+1 and s+2 and pick the first one we get.
// Checking 3 only requires us to load the source one.
ref := c.get(h, si)
ref2 := c.get(h2, si)
c.put(h, si)
c.put(h2, si+1)
offset := si - ref
if offset <= 0 || offset >= winSize || uint32(match) != binary.LittleEndian.Uint32(src[ref:]) {
// No match. Start calculating another hash.
// The processor can usually do this out-of-order.
h = blockHash(match >> 16)
ref3 := c.get(h, si+2)
// Check the second match at si+1
si += 1
offset = si - ref2
if offset <= 0 || offset >= winSize || uint32(match>>8) != binary.LittleEndian.Uint32(src[ref2:]) {
// No match. Check the third match at si+2
si += 1
offset = si - ref3
c.put(h, si)
if offset <= 0 || offset >= winSize || uint32(match>>16) != binary.LittleEndian.Uint32(src[ref3:]) {
// Skip one extra byte (at si+3) before we check 3 matches again.
si += 2 + (si-anchor)>>adaptSkipLog
continue
}
}
}
// Match found.
lLen := si - anchor // Literal length.
// We already matched 4 bytes.
mLen := 4
// Extend backwards if we can, reducing literals.
tOff := si - offset - 1
for lLen > 0 && tOff >= 0 && src[si-1] == src[tOff] {
si--
tOff--
lLen--
mLen++
}
// Add the match length, so we continue search at the end.
// Use mLen to store the offset base.
si, mLen = si+mLen, si+minMatch
// Find the longest match by looking by batches of 8 bytes.
for si+8 < sn {
x := binary.LittleEndian.Uint64(src[si:]) ^ binary.LittleEndian.Uint64(src[si-offset:])
if x == 0 {
si += 8
} else {
// Stop is first non-zero byte.
si += bits.TrailingZeros64(x) >> 3
break
}
}
mLen = si - mLen
if mLen < 0xF {
dst[di] = byte(mLen)
} else {
dst[di] = 0xF
}
// Encode literals length.
if lLen < 0xF {
dst[di] |= byte(lLen << 4)
} else {
dst[di] |= 0xF0
di++
l := lLen - 0xF
for ; l >= 0xFF; l -= 0xFF {
dst[di] = 0xFF
di++
}
dst[di] = byte(l)
}
di++
// Literals.
if di+lLen > len(dst) {
return 0, lz4errors.ErrInvalidSourceShortBuffer
}
copy(dst[di:di+lLen], src[anchor:anchor+lLen])
di += lLen + 2
anchor = si
// Encode offset.
if di > len(dst) {
return 0, lz4errors.ErrInvalidSourceShortBuffer
}
dst[di-2], dst[di-1] = byte(offset), byte(offset>>8)
// Encode match length part 2.
if mLen >= 0xF {
for mLen -= 0xF; mLen >= 0xFF && di < len(dst); mLen -= 0xFF {
dst[di] = 0xFF
di++
}
if di >= len(dst) {
return 0, lz4errors.ErrInvalidSourceShortBuffer
}
dst[di] = byte(mLen)
di++
}
// Check if we can load next values.
if si >= sn {
break
}
// Hash match end-2
h = blockHash(binary.LittleEndian.Uint64(src[si-2:]))
c.put(h, si-2)
}
lastLiterals:
if isNotCompressible && anchor == 0 {
// Incompressible.
return 0, nil
}
// Last literals.
if di >= len(dst) {
return 0, lz4errors.ErrInvalidSourceShortBuffer
}
lLen := len(src) - anchor
if lLen < 0xF {
dst[di] = byte(lLen << 4)
} else {
dst[di] = 0xF0
di++
for lLen -= 0xF; lLen >= 0xFF && di < len(dst); lLen -= 0xFF {
dst[di] = 0xFF
di++
}
if di >= len(dst) {
return 0, lz4errors.ErrInvalidSourceShortBuffer
}
dst[di] = byte(lLen)
}
di++
// Write the last literals.
if isNotCompressible && di >= anchor {
// Incompressible.
return 0, nil
}
if di+len(src)-anchor > len(dst) {
return 0, lz4errors.ErrInvalidSourceShortBuffer
}
di += copy(dst[di:di+len(src)-anchor], src[anchor:])
return di, nil
}
// blockHash hashes 4 bytes into a value < winSize.
func blockHashHC(x uint32) uint32 {
const hasher uint32 = 2654435761 // Knuth multiplicative hash.
return x * hasher >> (32 - winSizeLog)
}
type CompressorHC struct {
// hashTable: stores the last position found for a given hash
// chainTable: stores previous positions for a given hash
hashTable, chainTable [htSize]int
needsReset bool
}
var compressorHCPool = sync.Pool{New: func() interface{} { return new(CompressorHC) }}
func CompressBlockHC(src, dst []byte, depth CompressionLevel) (int, error) {
c := compressorHCPool.Get().(*CompressorHC)
n, err := c.CompressBlock(src, dst, depth)
compressorHCPool.Put(c)
return n, err
}
func (c *CompressorHC) CompressBlock(src, dst []byte, depth CompressionLevel) (_ int, err error) {
if c.needsReset {
// Zero out reused table to avoid non-deterministic output (issue #65).
c.hashTable = [htSize]int{}
c.chainTable = [htSize]int{}
}
c.needsReset = true // Only false on first call.
defer recoverBlock(&err)
// Return 0, nil only if the destination buffer size is < CompressBlockBound.
isNotCompressible := len(dst) < CompressBlockBound(len(src))
// adaptSkipLog sets how quickly the compressor begins skipping blocks when data is incompressible.
// This significantly speeds up incompressible data and usually has very small impact on compression.
// bytes to skip = 1 + (bytes since last match >> adaptSkipLog)
const adaptSkipLog = 7
var si, di, anchor int
sn := len(src) - mfLimit
if sn <= 0 {
goto lastLiterals
}
if depth == 0 {
depth = winSize
}
for si < sn {
// Hash the next 4 bytes (sequence).
match := binary.LittleEndian.Uint32(src[si:])
h := blockHashHC(match)
// Follow the chain until out of window and give the longest match.
mLen := 0
offset := 0
for next, try := c.hashTable[h], depth; try > 0 && next > 0 && si-next < winSize; next, try = c.chainTable[next&winMask], try-1 {
// The first (mLen==0) or next byte (mLen>=minMatch) at current match length
// must match to improve on the match length.
if src[next+mLen] != src[si+mLen] {
continue
}
ml := 0
// Compare the current position with a previous with the same hash.
for ml < sn-si {
x := binary.LittleEndian.Uint64(src[next+ml:]) ^ binary.LittleEndian.Uint64(src[si+ml:])
if x == 0 {
ml += 8
} else {
// Stop is first non-zero byte.
ml += bits.TrailingZeros64(x) >> 3
break
}
}
if ml < minMatch || ml <= mLen {
// Match too small (<minMath) or smaller than the current match.
continue
}
// Found a longer match, keep its position and length.
mLen = ml
offset = si - next
// Try another previous position with the same hash.
}
c.chainTable[si&winMask] = c.hashTable[h]
c.hashTable[h] = si
// No match found.
if mLen == 0 {
si += 1 + (si-anchor)>>adaptSkipLog
continue
}
// Match found.
// Update hash/chain tables with overlapping bytes:
// si already hashed, add everything from si+1 up to the match length.
winStart := si + 1
if ws := si + mLen - winSize; ws > winStart {
winStart = ws
}
for si, ml := winStart, si+mLen; si < ml; {
match >>= 8
match |= uint32(src[si+3]) << 24
h := blockHashHC(match)
c.chainTable[si&winMask] = c.hashTable[h]
c.hashTable[h] = si
si++
}
lLen := si - anchor
si += mLen
mLen -= minMatch // Match length does not include minMatch.
if mLen < 0xF {
dst[di] = byte(mLen)
} else {
dst[di] = 0xF
}
// Encode literals length.
if lLen < 0xF {
dst[di] |= byte(lLen << 4)
} else {
dst[di] |= 0xF0
di++
l := lLen - 0xF
for ; l >= 0xFF; l -= 0xFF {
dst[di] = 0xFF
di++
}
dst[di] = byte(l)
}
di++
// Literals.
copy(dst[di:di+lLen], src[anchor:anchor+lLen])
di += lLen
anchor = si
// Encode offset.
di += 2
dst[di-2], dst[di-1] = byte(offset), byte(offset>>8)
// Encode match length part 2.
if mLen >= 0xF {
for mLen -= 0xF; mLen >= 0xFF; mLen -= 0xFF {
dst[di] = 0xFF
di++
}
dst[di] = byte(mLen)
di++
}
}
if isNotCompressible && anchor == 0 {
// Incompressible.
return 0, nil
}
// Last literals.
lastLiterals:
lLen := len(src) - anchor
if lLen < 0xF {
dst[di] = byte(lLen << 4)
} else {
dst[di] = 0xF0
di++
lLen -= 0xF
for ; lLen >= 0xFF; lLen -= 0xFF {
dst[di] = 0xFF
di++
}
dst[di] = byte(lLen)
}
di++
// Write the last literals.
if isNotCompressible && di >= anchor {
// Incompressible.
return 0, nil
}
di += copy(dst[di:di+len(src)-anchor], src[anchor:])
return di, nil
}

View File

@@ -0,0 +1,88 @@
// Package lz4block provides LZ4 BlockSize types and pools of buffers.
package lz4block
import "sync"
const (
Block64Kb uint32 = 1 << (16 + iota*2)
Block256Kb
Block1Mb
Block4Mb
Block8Mb = 2 * Block4Mb
legacyBlockSize = Block8Mb + Block8Mb/255 + 16 // CompressBound(Block8Mb)
)
var (
BlockPool64K = sync.Pool{New: func() interface{} { return make([]byte, Block64Kb) }}
BlockPool256K = sync.Pool{New: func() interface{} { return make([]byte, Block256Kb) }}
BlockPool1M = sync.Pool{New: func() interface{} { return make([]byte, Block1Mb) }}
BlockPool4M = sync.Pool{New: func() interface{} { return make([]byte, Block4Mb) }}
BlockPool8M = sync.Pool{New: func() interface{} { return make([]byte, legacyBlockSize) }}
)
func Index(b uint32) BlockSizeIndex {
switch b {
case Block64Kb:
return 4
case Block256Kb:
return 5
case Block1Mb:
return 6
case Block4Mb:
return 7
case Block8Mb: // only valid in legacy mode
return 3
}
return 0
}
func IsValid(b uint32) bool {
return Index(b) > 0
}
type BlockSizeIndex uint8
func (b BlockSizeIndex) IsValid() bool {
switch b {
case 4, 5, 6, 7:
return true
}
return false
}
func (b BlockSizeIndex) Get() []byte {
var buf interface{}
switch b {
case 4:
buf = BlockPool64K.Get()
case 5:
buf = BlockPool256K.Get()
case 6:
buf = BlockPool1M.Get()
case 7:
buf = BlockPool4M.Get()
case 3:
buf = BlockPool8M.Get()
}
return buf.([]byte)
}
func Put(buf []byte) {
// Safeguard: do not allow invalid buffers.
switch c := cap(buf); uint32(c) {
case Block64Kb:
BlockPool64K.Put(buf[:c])
case Block256Kb:
BlockPool256K.Put(buf[:c])
case Block1Mb:
BlockPool1M.Put(buf[:c])
case Block4Mb:
BlockPool4M.Put(buf[:c])
case legacyBlockSize:
BlockPool8M.Put(buf[:c])
}
}
type CompressionLevel uint32
const Fast CompressionLevel = 0

View File

@@ -0,0 +1,369 @@
// +build !appengine
// +build gc
// +build !noasm
#include "textflag.h"
// AX scratch
// BX scratch
// CX scratch
// DX token
//
// DI &dst
// SI &src
// R8 &dst + len(dst)
// R9 &src + len(src)
// R11 &dst
// R12 short output end
// R13 short input end
// func decodeBlock(dst, src []byte) int
// using 50 bytes of stack currently
TEXT ·decodeBlock(SB), NOSPLIT, $64-56
MOVQ dst_base+0(FP), DI
MOVQ DI, R11
MOVQ dst_len+8(FP), R8
ADDQ DI, R8
MOVQ src_base+24(FP), SI
MOVQ src_len+32(FP), R9
CMPQ R9, $0
JE err_corrupt
ADDQ SI, R9
// shortcut ends
// short output end
MOVQ R8, R12
SUBQ $32, R12
// short input end
MOVQ R9, R13
SUBQ $16, R13
loop:
// for si < len(src)
CMPQ SI, R9
JGE end
// token := uint32(src[si])
MOVBQZX (SI), DX
INCQ SI
// lit_len = token >> 4
// if lit_len > 0
// CX = lit_len
MOVQ DX, CX
SHRQ $4, CX
// if lit_len != 0xF
CMPQ CX, $0xF
JEQ lit_len_loop_pre
CMPQ DI, R12
JGE lit_len_loop_pre
CMPQ SI, R13
JGE lit_len_loop_pre
// copy shortcut
// A two-stage shortcut for the most common case:
// 1) If the literal length is 0..14, and there is enough space,
// enter the shortcut and copy 16 bytes on behalf of the literals
// (in the fast mode, only 8 bytes can be safely copied this way).
// 2) Further if the match length is 4..18, copy 18 bytes in a similar
// manner; but we ensure that there's enough space in the output for
// those 18 bytes earlier, upon entering the shortcut (in other words,
// there is a combined check for both stages).
// copy literal
MOVOU (SI), X0
MOVOU X0, (DI)
ADDQ CX, DI
ADDQ CX, SI
MOVQ DX, CX
ANDQ $0xF, CX
// The second stage: prepare for match copying, decode full info.
// If it doesn't work out, the info won't be wasted.
// offset := uint16(data[:2])
MOVWQZX (SI), DX
ADDQ $2, SI
MOVQ DI, AX
SUBQ DX, AX
CMPQ AX, DI
JGT err_short_buf
// if we can't do the second stage then jump straight to read the
// match length, we already have the offset.
CMPQ CX, $0xF
JEQ match_len_loop_pre
CMPQ DX, $8
JLT match_len_loop_pre
CMPQ AX, R11
JLT err_short_buf
// memcpy(op + 0, match + 0, 8);
MOVQ (AX), BX
MOVQ BX, (DI)
// memcpy(op + 8, match + 8, 8);
MOVQ 8(AX), BX
MOVQ BX, 8(DI)
// memcpy(op +16, match +16, 2);
MOVW 16(AX), BX
MOVW BX, 16(DI)
LEAQ 4(DI)(CX*1), DI // minmatch
// shortcut complete, load next token
JMP loop
lit_len_loop_pre:
// if lit_len > 0
CMPQ CX, $0
JEQ offset
CMPQ CX, $0xF
JNE copy_literal
lit_len_loop:
// for src[si] == 0xFF
CMPB (SI), $0xFF
JNE lit_len_finalise
// bounds check src[si+1]
LEAQ 1(SI), AX
CMPQ AX, R9
JGT err_short_buf
// lit_len += 0xFF
ADDQ $0xFF, CX
INCQ SI
JMP lit_len_loop
lit_len_finalise:
// lit_len += int(src[si])
// si++
MOVBQZX (SI), AX
ADDQ AX, CX
INCQ SI
copy_literal:
// bounds check src and dst
LEAQ (SI)(CX*1), AX
CMPQ AX, R9
JGT err_short_buf
LEAQ (DI)(CX*1), AX
CMPQ AX, R8
JGT err_short_buf
// whats a good cut off to call memmove?
CMPQ CX, $16
JGT memmove_lit
// if len(dst[di:]) < 16
MOVQ R8, AX
SUBQ DI, AX
CMPQ AX, $16
JLT memmove_lit
// if len(src[si:]) < 16
MOVQ R9, AX
SUBQ SI, AX
CMPQ AX, $16
JLT memmove_lit
MOVOU (SI), X0
MOVOU X0, (DI)
JMP finish_lit_copy
memmove_lit:
// memmove(to, from, len)
MOVQ DI, 0(SP)
MOVQ SI, 8(SP)
MOVQ CX, 16(SP)
// spill
MOVQ DI, 24(SP)
MOVQ SI, 32(SP)
MOVQ CX, 40(SP) // need len to inc SI, DI after
MOVB DX, 48(SP)
CALL runtime·memmove(SB)
// restore registers
MOVQ 24(SP), DI
MOVQ 32(SP), SI
MOVQ 40(SP), CX
MOVB 48(SP), DX
// recalc initial values
MOVQ dst_base+0(FP), R8
MOVQ R8, R11
ADDQ dst_len+8(FP), R8
MOVQ src_base+24(FP), R9
ADDQ src_len+32(FP), R9
MOVQ R8, R12
SUBQ $32, R12
MOVQ R9, R13
SUBQ $16, R13
finish_lit_copy:
ADDQ CX, SI
ADDQ CX, DI
CMPQ SI, R9
JGE end
offset:
// CX := mLen
// free up DX to use for offset
MOVQ DX, CX
LEAQ 2(SI), AX
CMPQ AX, R9
JGT err_short_buf
// offset
// DX := int(src[si]) | int(src[si+1])<<8
MOVWQZX (SI), DX
ADDQ $2, SI
// 0 offset is invalid
CMPQ DX, $0
JEQ err_corrupt
ANDB $0xF, CX
match_len_loop_pre:
// if mlen != 0xF
CMPB CX, $0xF
JNE copy_match
match_len_loop:
// for src[si] == 0xFF
// lit_len += 0xFF
CMPB (SI), $0xFF
JNE match_len_finalise
// bounds check src[si+1]
LEAQ 1(SI), AX
CMPQ AX, R9
JGT err_short_buf
ADDQ $0xFF, CX
INCQ SI
JMP match_len_loop
match_len_finalise:
// lit_len += int(src[si])
// si++
MOVBQZX (SI), AX
ADDQ AX, CX
INCQ SI
copy_match:
// mLen += minMatch
ADDQ $4, CX
// check we have match_len bytes left in dst
// di+match_len < len(dst)
LEAQ (DI)(CX*1), AX
CMPQ AX, R8
JGT err_short_buf
// DX = offset
// CX = match_len
// BX = &dst + (di - offset)
MOVQ DI, BX
SUBQ DX, BX
// check BX is within dst
// if BX < &dst
CMPQ BX, R11
JLT err_short_buf
// if offset + match_len < di
LEAQ (BX)(CX*1), AX
CMPQ DI, AX
JGT copy_interior_match
// AX := len(dst[:di])
// MOVQ DI, AX
// SUBQ R11, AX
// copy 16 bytes at a time
// if di-offset < 16 copy 16-(di-offset) bytes to di
// then do the remaining
copy_match_loop:
// for match_len >= 0
// dst[di] = dst[i]
// di++
// i++
MOVB (BX), AX
MOVB AX, (DI)
INCQ DI
INCQ BX
DECQ CX
CMPQ CX, $0
JGT copy_match_loop
JMP loop
copy_interior_match:
CMPQ CX, $16
JGT memmove_match
// if len(dst[di:]) < 16
MOVQ R8, AX
SUBQ DI, AX
CMPQ AX, $16
JLT memmove_match
MOVOU (BX), X0
MOVOU X0, (DI)
ADDQ CX, DI
JMP loop
memmove_match:
// memmove(to, from, len)
MOVQ DI, 0(SP)
MOVQ BX, 8(SP)
MOVQ CX, 16(SP)
// spill
MOVQ DI, 24(SP)
MOVQ SI, 32(SP)
MOVQ CX, 40(SP) // need len to inc SI, DI after
CALL runtime·memmove(SB)
// restore registers
MOVQ 24(SP), DI
MOVQ 32(SP), SI
MOVQ 40(SP), CX
// recalc initial values
MOVQ dst_base+0(FP), R8
MOVQ R8, R11 // TODO: make these sensible numbers
ADDQ dst_len+8(FP), R8
MOVQ src_base+24(FP), R9
ADDQ src_len+32(FP), R9
MOVQ R8, R12
SUBQ $32, R12
MOVQ R9, R13
SUBQ $16, R13
ADDQ CX, DI
JMP loop
err_corrupt:
MOVQ $-1, ret+48(FP)
RET
err_short_buf:
MOVQ $-2, ret+48(FP)
RET
end:
SUBQ R11, DI
MOVQ DI, ret+48(FP)
RET

View File

@@ -0,0 +1,201 @@
// +build gc
// +build !noasm
#include "textflag.h"
// Register allocation.
#define dst R0
#define dstorig R1
#define src R2
#define dstend R3
#define srcend R4
#define match R5 // Match address.
#define token R6
#define len R7 // Literal and match lengths.
#define offset R6 // Match offset; overlaps with token.
#define tmp1 R8
#define tmp2 R9
#define tmp3 R12
#define minMatch $4
// func decodeBlock(dst, src []byte) int
TEXT ·decodeBlock(SB), NOFRAME|NOSPLIT, $-4-28
MOVW dst_base +0(FP), dst
MOVW dst_len +4(FP), dstend
MOVW src_base+12(FP), src
MOVW src_len +16(FP), srcend
CMP $0, srcend
BEQ shortSrc
ADD dst, dstend
ADD src, srcend
MOVW dst, dstorig
loop:
// Read token. Extract literal length.
MOVBU.P 1(src), token
MOVW token >> 4, len
CMP $15, len
BNE readLitlenDone
readLitlenLoop:
CMP src, srcend
BEQ shortSrc
MOVBU.P 1(src), tmp1
ADD tmp1, len
CMP $255, tmp1
BEQ readLitlenLoop
readLitlenDone:
CMP $0, len
BEQ copyLiteralDone
// Bounds check dst+len and src+len.
ADD dst, len, tmp1
CMP dstend, tmp1
//BHI shortDst // Uncomment for distinct error codes.
ADD src, len, tmp2
CMP.LS srcend, tmp2
BHI shortSrc
// Copy literal.
CMP $4, len
BLO copyLiteralFinish
// Copy 0-3 bytes until src is aligned.
TST $1, src
MOVBU.NE.P 1(src), tmp1
MOVB.NE.P tmp1, 1(dst)
SUB.NE $1, len
TST $2, src
MOVHU.NE.P 2(src), tmp2
MOVB.NE.P tmp2, 1(dst)
MOVW.NE tmp2 >> 8, tmp1
MOVB.NE.P tmp1, 1(dst)
SUB.NE $2, len
B copyLiteralLoopCond
copyLiteralLoop:
// Aligned load, unaligned write.
MOVW.P 4(src), tmp1
MOVW tmp1 >> 8, tmp2
MOVB tmp2, 1(dst)
MOVW tmp1 >> 16, tmp3
MOVB tmp3, 2(dst)
MOVW tmp1 >> 24, tmp2
MOVB tmp2, 3(dst)
MOVB.P tmp1, 4(dst)
copyLiteralLoopCond:
// Loop until len-4 < 0.
SUB.S $4, len
BPL copyLiteralLoop
// Restore len, which is now negative.
ADD $4, len
copyLiteralFinish:
// Copy remaining 0-3 bytes.
TST $2, len
MOVHU.NE.P 2(src), tmp2
MOVB.NE.P tmp2, 1(dst)
MOVW.NE tmp2 >> 8, tmp1
MOVB.NE.P tmp1, 1(dst)
TST $1, len
MOVBU.NE.P 1(src), tmp1
MOVB.NE.P tmp1, 1(dst)
copyLiteralDone:
CMP src, srcend
BEQ end
// Initial part of match length.
// This frees up the token register for reuse as offset.
AND $15, token, len
// Read offset.
ADD $2, src
CMP srcend, src
BHI shortSrc
MOVBU -2(src), offset
MOVBU -1(src), tmp1
ORR tmp1 << 8, offset
CMP $0, offset
BEQ corrupt
// Read rest of match length.
CMP $15, len
BNE readMatchlenDone
readMatchlenLoop:
CMP src, srcend
BEQ shortSrc
MOVBU.P 1(src), tmp1
ADD tmp1, len
CMP $255, tmp1
BEQ readMatchlenLoop
readMatchlenDone:
ADD minMatch, len
// Bounds check dst+len and match = dst-offset.
ADD dst, len, tmp1
CMP dstend, tmp1
//BHI shortDst // Uncomment for distinct error codes.
SUB offset, dst, match
CMP.LS match, dstorig
BHI corrupt
// If the offset is at least four (len is, because of minMatch),
// do a four-way unrolled byte copy loop. Using MOVD instead of four
// byte loads is much faster, but to remain portable we'd have to
// align match first, which in turn is too expensive.
CMP $4, offset
BLO copyMatch
SUB $4, len
copyMatch4:
MOVBU.P 4(match), tmp1
MOVB.P tmp1, 4(dst)
MOVBU -3(match), tmp2
MOVB tmp2, -3(dst)
MOVBU -2(match), tmp3
MOVB tmp3, -2(dst)
MOVBU -1(match), tmp1
MOVB tmp1, -1(dst)
SUB.S $4, len
BPL copyMatch4
// Restore len, which is now negative.
ADD.S $4, len
BEQ copyMatchDone
copyMatch:
// Simple byte-at-a-time copy.
SUB.S $1, len
MOVBU.P 1(match), tmp2
MOVB.P tmp2, 1(dst)
BNE copyMatch
copyMatchDone:
CMP src, srcend
BNE loop
end:
SUB dstorig, dst, tmp1
MOVW tmp1, ret+24(FP)
RET
// The three error cases have distinct labels so we can put different
// return codes here when debugging, or if the error returns need to
// be changed.
shortDst:
shortSrc:
corrupt:
MOVW $-1, tmp1
MOVW tmp1, ret+24(FP)
RET

View File

@@ -0,0 +1,9 @@
// +build amd64 arm
// +build !appengine
// +build gc
// +build !noasm
package lz4block
//go:noescape
func decodeBlock(dst, src []byte) int

View File

@@ -0,0 +1,100 @@
// +build !amd64,!arm appengine !gc noasm
package lz4block
func decodeBlock(dst, src []byte) (ret int) {
const hasError = -2
defer func() {
if recover() != nil {
ret = hasError
}
}()
var si, di uint
for {
// Literals and match lengths (token).
b := uint(src[si])
si++
// Literals.
if lLen := b >> 4; lLen > 0 {
switch {
case lLen < 0xF && si+16 < uint(len(src)):
// Shortcut 1
// if we have enough room in src and dst, and the literals length
// is small enough (0..14) then copy all 16 bytes, even if not all
// are part of the literals.
copy(dst[di:], src[si:si+16])
si += lLen
di += lLen
if mLen := b & 0xF; mLen < 0xF {
// Shortcut 2
// if the match length (4..18) fits within the literals, then copy
// all 18 bytes, even if not all are part of the literals.
mLen += 4
if offset := uint(src[si]) | uint(src[si+1])<<8; mLen <= offset {
i := di - offset
end := i + 18
if end > uint(len(dst)) {
// The remaining buffer may not hold 18 bytes.
// See https://github.com/pierrec/lz4/issues/51.
end = uint(len(dst))
}
copy(dst[di:], dst[i:end])
si += 2
di += mLen
continue
}
}
case lLen == 0xF:
for src[si] == 0xFF {
lLen += 0xFF
si++
}
lLen += uint(src[si])
si++
fallthrough
default:
copy(dst[di:di+lLen], src[si:si+lLen])
si += lLen
di += lLen
}
}
if si == uint(len(src)) {
return int(di)
} else if si > uint(len(src)) {
return hasError
}
offset := uint(src[si]) | uint(src[si+1])<<8
if offset == 0 {
return hasError
}
si += 2
// Match.
mLen := b & 0xF
if mLen == 0xF {
for src[si] == 0xFF {
mLen += 0xFF
si++
}
mLen += uint(src[si])
si++
}
mLen += minMatch
// Copy the match.
expanded := dst[di-offset:]
if mLen > offset {
// Efficiently copy the match dst[di-offset:di] into the dst slice.
bytesToCopy := offset * (mLen / offset)
for n := offset; n <= bytesToCopy+offset; n *= 2 {
copy(expanded[n:], expanded[:n])
}
di += bytesToCopy
mLen -= bytesToCopy
}
di += uint(copy(dst[di:di+mLen], expanded[:mLen]))
}
}

View File

@@ -0,0 +1,19 @@
package lz4errors
type Error string
func (e Error) Error() string { return string(e) }
const (
ErrInvalidSourceShortBuffer Error = "lz4: invalid source or destination buffer too short"
ErrInvalidFrame Error = "lz4: bad magic number"
ErrInternalUnhandledState Error = "lz4: unhandled state"
ErrInvalidHeaderChecksum Error = "lz4: invalid header checksum"
ErrInvalidBlockChecksum Error = "lz4: invalid block checksum"
ErrInvalidFrameChecksum Error = "lz4: invalid frame checksum"
ErrOptionInvalidCompressionLevel Error = "lz4: invalid compression level"
ErrOptionClosedOrError Error = "lz4: cannot apply options on closed or in error object"
ErrOptionInvalidBlockSize Error = "lz4: invalid block size"
ErrOptionNotApplicable Error = "lz4: option not applicable"
ErrWriterNotClosed Error = "lz4: writer not closed"
)

View File

@@ -0,0 +1,331 @@
package lz4stream
import (
"encoding/binary"
"fmt"
"io"
"sync"
"github.com/pierrec/lz4/v4/internal/lz4block"
"github.com/pierrec/lz4/v4/internal/lz4errors"
"github.com/pierrec/lz4/v4/internal/xxh32"
)
type Blocks struct {
Block *FrameDataBlock
Blocks chan chan *FrameDataBlock
mu sync.Mutex
err error
}
func (b *Blocks) initW(f *Frame, dst io.Writer, num int) {
if num == 1 {
b.Blocks = nil
b.Block = NewFrameDataBlock(f)
return
}
b.Block = nil
if cap(b.Blocks) != num {
b.Blocks = make(chan chan *FrameDataBlock, num)
}
// goroutine managing concurrent block compression goroutines.
go func() {
// Process next block compression item.
for c := range b.Blocks {
// Read the next compressed block result.
// Waiting here ensures that the blocks are output in the order they were sent.
// The incoming channel is always closed as it indicates to the caller that
// the block has been processed.
block := <-c
if block == nil {
// Notify the block compression routine that we are done with its result.
// This is used when a sentinel block is sent to terminate the compression.
close(c)
return
}
// Do not attempt to write the block upon any previous failure.
if b.err == nil {
// Write the block.
if err := block.Write(f, dst); err != nil {
// Keep the first error.
b.err = err
// All pending compression goroutines need to shut down, so we need to keep going.
}
}
close(c)
}
}()
}
func (b *Blocks) close(f *Frame, num int) error {
if num == 1 {
if b.Block != nil {
b.Block.Close(f)
}
err := b.err
b.err = nil
return err
}
if b.Blocks == nil {
// Not initialized yet.
return nil
}
c := make(chan *FrameDataBlock)
b.Blocks <- c
c <- nil
<-c
err := b.err
b.err = nil
return err
}
// ErrorR returns any error set while uncompressing a stream.
func (b *Blocks) ErrorR() error {
b.mu.Lock()
defer b.mu.Unlock()
return b.err
}
// initR returns a channel that streams the uncompressed blocks if in concurrent
// mode and no error. When the channel is closed, check for any error with b.ErrorR.
//
// If not in concurrent mode, the uncompressed block is b.Block and the returned error
// needs to be checked.
func (b *Blocks) initR(f *Frame, num int, src io.Reader) (chan []byte, error) {
size := f.Descriptor.Flags.BlockSizeIndex()
if num == 1 {
b.Blocks = nil
b.Block = NewFrameDataBlock(f)
return nil, nil
}
b.Block = nil
blocks := make(chan chan []byte, num)
// data receives the uncompressed blocks.
data := make(chan []byte)
// Read blocks from the source sequentially
// and uncompress them concurrently.
// In legacy mode, accrue the uncompress sizes in cum.
var cum uint32
go func() {
var cumx uint32
var err error
for b.ErrorR() == nil {
block := NewFrameDataBlock(f)
cumx, err = block.Read(f, src, 0)
if err != nil {
break
}
// Recheck for an error as reading may be slow and uncompressing is expensive.
if b.ErrorR() != nil {
break
}
c := make(chan []byte)
blocks <- c
go func() {
data, err := block.Uncompress(f, size.Get(), false)
if err != nil {
b.closeR(err)
} else {
c <- data
}
}()
}
// End the collection loop and the data channel.
c := make(chan []byte)
blocks <- c
c <- nil // signal the collection loop that we are done
<-c // wait for the collect loop to complete
if f.isLegacy() && cum == cumx {
err = io.EOF
}
b.closeR(err)
close(data)
}()
// Collect the uncompressed blocks and make them available
// on the returned channel.
go func(leg bool) {
defer close(blocks)
for c := range blocks {
buf := <-c
if buf == nil {
// Signal to end the loop.
close(c)
return
}
// Perform checksum now as the blocks are received in order.
if f.Descriptor.Flags.ContentChecksum() {
_, _ = f.checksum.Write(buf)
}
if leg {
cum += uint32(len(buf))
}
data <- buf
close(c)
}
}(f.isLegacy())
return data, nil
}
// closeR safely sets the error on b if not already set.
func (b *Blocks) closeR(err error) {
b.mu.Lock()
if b.err == nil {
b.err = err
}
b.mu.Unlock()
}
func NewFrameDataBlock(f *Frame) *FrameDataBlock {
buf := f.Descriptor.Flags.BlockSizeIndex().Get()
return &FrameDataBlock{Data: buf, data: buf}
}
type FrameDataBlock struct {
Size DataBlockSize
Data []byte // compressed or uncompressed data (.data or .src)
Checksum uint32
data []byte // buffer for compressed data
src []byte // uncompressed data
err error // used in concurrent mode
}
func (b *FrameDataBlock) Close(f *Frame) {
b.Size = 0
b.Checksum = 0
b.err = nil
if b.data != nil {
// Block was not already closed.
lz4block.Put(b.data)
b.Data = nil
b.data = nil
b.src = nil
}
}
// Block compression errors are ignored since the buffer is sized appropriately.
func (b *FrameDataBlock) Compress(f *Frame, src []byte, level lz4block.CompressionLevel) *FrameDataBlock {
data := b.data
if f.isLegacy() {
data = data[:cap(data)]
} else {
data = data[:len(src)] // trigger the incompressible flag in CompressBlock
}
var n int
switch level {
case lz4block.Fast:
n, _ = lz4block.CompressBlock(src, data)
default:
n, _ = lz4block.CompressBlockHC(src, data, level)
}
if n == 0 {
b.Size.UncompressedSet(true)
b.Data = src
} else {
b.Size.UncompressedSet(false)
b.Data = data[:n]
}
b.Size.sizeSet(len(b.Data))
b.src = src // keep track of the source for content checksum
if f.Descriptor.Flags.BlockChecksum() {
b.Checksum = xxh32.ChecksumZero(src)
}
return b
}
func (b *FrameDataBlock) Write(f *Frame, dst io.Writer) error {
// Write is called in the same order as blocks are compressed,
// so content checksum must be done here.
if f.Descriptor.Flags.ContentChecksum() {
_, _ = f.checksum.Write(b.src)
}
buf := f.buf[:]
binary.LittleEndian.PutUint32(buf, uint32(b.Size))
if _, err := dst.Write(buf[:4]); err != nil {
return err
}
if _, err := dst.Write(b.Data); err != nil {
return err
}
if b.Checksum == 0 {
return nil
}
binary.LittleEndian.PutUint32(buf, b.Checksum)
_, err := dst.Write(buf[:4])
return err
}
// Read updates b with the next block data, size and checksum if available.
func (b *FrameDataBlock) Read(f *Frame, src io.Reader, cum uint32) (uint32, error) {
x, err := f.readUint32(src)
if err != nil {
return 0, err
}
if f.isLegacy() {
switch x {
case frameMagicLegacy:
// Concatenated legacy frame.
return b.Read(f, src, cum)
case cum:
// Only works in non concurrent mode, for concurrent mode
// it is handled separately.
// Linux kernel format appends the total uncompressed size at the end.
return 0, io.EOF
}
} else if x == 0 {
// Marker for end of stream.
return 0, io.EOF
}
b.Size = DataBlockSize(x)
size := b.Size.size()
if size > cap(b.data) {
return x, lz4errors.ErrOptionInvalidBlockSize
}
b.data = b.data[:size]
if _, err := io.ReadFull(src, b.data); err != nil {
return x, err
}
if f.Descriptor.Flags.BlockChecksum() {
sum, err := f.readUint32(src)
if err != nil {
return 0, err
}
b.Checksum = sum
}
return x, nil
}
func (b *FrameDataBlock) Uncompress(f *Frame, dst []byte, sum bool) ([]byte, error) {
if b.Size.Uncompressed() {
n := copy(dst, b.data)
dst = dst[:n]
} else {
n, err := lz4block.UncompressBlock(b.data, dst)
if err != nil {
return nil, err
}
dst = dst[:n]
}
if f.Descriptor.Flags.BlockChecksum() {
if c := xxh32.ChecksumZero(dst); c != b.Checksum {
err := fmt.Errorf("%w: got %x; expected %x", lz4errors.ErrInvalidBlockChecksum, c, b.Checksum)
return nil, err
}
}
if sum && f.Descriptor.Flags.ContentChecksum() {
_, _ = f.checksum.Write(dst)
}
return dst, nil
}
func (f *Frame) readUint32(r io.Reader) (x uint32, err error) {
if _, err = io.ReadFull(r, f.buf[:4]); err != nil {
return
}
x = binary.LittleEndian.Uint32(f.buf[:4])
return
}

View File

@@ -0,0 +1,200 @@
// Package lz4stream provides the types that support reading and writing LZ4 data streams.
package lz4stream
import (
"encoding/binary"
"fmt"
"io"
"io/ioutil"
"github.com/pierrec/lz4/v4/internal/lz4block"
"github.com/pierrec/lz4/v4/internal/lz4errors"
"github.com/pierrec/lz4/v4/internal/xxh32"
)
//go:generate go run gen.go
const (
frameMagic uint32 = 0x184D2204
frameSkipMagic uint32 = 0x184D2A50
frameMagicLegacy uint32 = 0x184C2102
)
func NewFrame() *Frame {
return &Frame{}
}
type Frame struct {
buf [15]byte // frame descriptor needs at most 4(magic)+4+8+1=11 bytes
Magic uint32
Descriptor FrameDescriptor
Blocks Blocks
Checksum uint32
checksum xxh32.XXHZero
}
// Reset allows reusing the Frame.
// The Descriptor configuration is not modified.
func (f *Frame) Reset(num int) {
f.Magic = 0
f.Descriptor.Checksum = 0
f.Descriptor.ContentSize = 0
_ = f.Blocks.close(f, num)
f.Checksum = 0
}
func (f *Frame) InitW(dst io.Writer, num int, legacy bool) {
if legacy {
f.Magic = frameMagicLegacy
idx := lz4block.Index(lz4block.Block8Mb)
f.Descriptor.Flags.BlockSizeIndexSet(idx)
} else {
f.Magic = frameMagic
f.Descriptor.initW()
}
f.Blocks.initW(f, dst, num)
f.checksum.Reset()
}
func (f *Frame) CloseW(dst io.Writer, num int) error {
if err := f.Blocks.close(f, num); err != nil {
return err
}
if f.isLegacy() {
return nil
}
buf := f.buf[:0]
// End mark (data block size of uint32(0)).
buf = append(buf, 0, 0, 0, 0)
if f.Descriptor.Flags.ContentChecksum() {
buf = f.checksum.Sum(buf)
}
_, err := dst.Write(buf)
return err
}
func (f *Frame) isLegacy() bool {
return f.Magic == frameMagicLegacy
}
func (f *Frame) InitR(src io.Reader, num int) (chan []byte, error) {
if f.Magic > 0 {
// Header already read.
return nil, nil
}
newFrame:
var err error
if f.Magic, err = f.readUint32(src); err != nil {
return nil, err
}
switch m := f.Magic; {
case m == frameMagic || m == frameMagicLegacy:
// All 16 values of frameSkipMagic are valid.
case m>>8 == frameSkipMagic>>8:
skip, err := f.readUint32(src)
if err != nil {
return nil, err
}
if _, err := io.CopyN(ioutil.Discard, src, int64(skip)); err != nil {
return nil, err
}
goto newFrame
default:
return nil, lz4errors.ErrInvalidFrame
}
if err := f.Descriptor.initR(f, src); err != nil {
return nil, err
}
f.checksum.Reset()
return f.Blocks.initR(f, num, src)
}
func (f *Frame) CloseR(src io.Reader) (err error) {
if f.isLegacy() {
return nil
}
if !f.Descriptor.Flags.ContentChecksum() {
return nil
}
if f.Checksum, err = f.readUint32(src); err != nil {
return err
}
if c := f.checksum.Sum32(); c != f.Checksum {
return fmt.Errorf("%w: got %x; expected %x", lz4errors.ErrInvalidFrameChecksum, c, f.Checksum)
}
return nil
}
type FrameDescriptor struct {
Flags DescriptorFlags
ContentSize uint64
Checksum uint8
}
func (fd *FrameDescriptor) initW() {
fd.Flags.VersionSet(1)
fd.Flags.BlockIndependenceSet(true)
}
func (fd *FrameDescriptor) Write(f *Frame, dst io.Writer) error {
if fd.Checksum > 0 {
// Header already written.
return nil
}
buf := f.buf[:4]
// Write the magic number here even though it belongs to the Frame.
binary.LittleEndian.PutUint32(buf, f.Magic)
if !f.isLegacy() {
buf = buf[:4+2]
binary.LittleEndian.PutUint16(buf[4:], uint16(fd.Flags))
if fd.Flags.Size() {
buf = buf[:4+2+8]
binary.LittleEndian.PutUint64(buf[4+2:], fd.ContentSize)
}
fd.Checksum = descriptorChecksum(buf[4:])
buf = append(buf, fd.Checksum)
}
_, err := dst.Write(buf)
return err
}
func (fd *FrameDescriptor) initR(f *Frame, src io.Reader) error {
if f.isLegacy() {
idx := lz4block.Index(lz4block.Block8Mb)
f.Descriptor.Flags.BlockSizeIndexSet(idx)
return nil
}
// Read the flags and the checksum, hoping that there is not content size.
buf := f.buf[:3]
if _, err := io.ReadFull(src, buf); err != nil {
return err
}
descr := binary.LittleEndian.Uint16(buf)
fd.Flags = DescriptorFlags(descr)
if fd.Flags.Size() {
// Append the 8 missing bytes.
buf = buf[:3+8]
if _, err := io.ReadFull(src, buf[3:]); err != nil {
return err
}
fd.ContentSize = binary.LittleEndian.Uint64(buf[2:])
}
fd.Checksum = buf[len(buf)-1] // the checksum is the last byte
buf = buf[:len(buf)-1] // all descriptor fields except checksum
if c := descriptorChecksum(buf); fd.Checksum != c {
return fmt.Errorf("%w: got %x; expected %x", lz4errors.ErrInvalidHeaderChecksum, c, fd.Checksum)
}
// Validate the elements that can be.
if idx := fd.Flags.BlockSizeIndex(); !idx.IsValid() {
return lz4errors.ErrOptionInvalidBlockSize
}
return nil
}
func descriptorChecksum(buf []byte) byte {
return byte(xxh32.ChecksumZero(buf) >> 8)
}

View File

@@ -0,0 +1,103 @@
// Code generated by `gen.exe`. DO NOT EDIT.
package lz4stream
import "github.com/pierrec/lz4/v4/internal/lz4block"
// DescriptorFlags is defined as follow:
// field bits
// ----- ----
// _ 2
// ContentChecksum 1
// Size 1
// BlockChecksum 1
// BlockIndependence 1
// Version 2
// _ 4
// BlockSizeIndex 3
// _ 1
type DescriptorFlags uint16
// Getters.
func (x DescriptorFlags) ContentChecksum() bool { return x>>2&1 != 0 }
func (x DescriptorFlags) Size() bool { return x>>3&1 != 0 }
func (x DescriptorFlags) BlockChecksum() bool { return x>>4&1 != 0 }
func (x DescriptorFlags) BlockIndependence() bool { return x>>5&1 != 0 }
func (x DescriptorFlags) Version() uint16 { return uint16(x >> 6 & 0x3) }
func (x DescriptorFlags) BlockSizeIndex() lz4block.BlockSizeIndex {
return lz4block.BlockSizeIndex(x >> 12 & 0x7)
}
// Setters.
func (x *DescriptorFlags) ContentChecksumSet(v bool) *DescriptorFlags {
const b = 1 << 2
if v {
*x = *x&^b | b
} else {
*x &^= b
}
return x
}
func (x *DescriptorFlags) SizeSet(v bool) *DescriptorFlags {
const b = 1 << 3
if v {
*x = *x&^b | b
} else {
*x &^= b
}
return x
}
func (x *DescriptorFlags) BlockChecksumSet(v bool) *DescriptorFlags {
const b = 1 << 4
if v {
*x = *x&^b | b
} else {
*x &^= b
}
return x
}
func (x *DescriptorFlags) BlockIndependenceSet(v bool) *DescriptorFlags {
const b = 1 << 5
if v {
*x = *x&^b | b
} else {
*x &^= b
}
return x
}
func (x *DescriptorFlags) VersionSet(v uint16) *DescriptorFlags {
*x = *x&^(0x3<<6) | (DescriptorFlags(v) & 0x3 << 6)
return x
}
func (x *DescriptorFlags) BlockSizeIndexSet(v lz4block.BlockSizeIndex) *DescriptorFlags {
*x = *x&^(0x7<<12) | (DescriptorFlags(v) & 0x7 << 12)
return x
}
// Code generated by `gen.exe`. DO NOT EDIT.
// DataBlockSize is defined as follow:
// field bits
// ----- ----
// size 31
// Uncompressed 1
type DataBlockSize uint32
// Getters.
func (x DataBlockSize) size() int { return int(x & 0x7FFFFFFF) }
func (x DataBlockSize) Uncompressed() bool { return x>>31&1 != 0 }
// Setters.
func (x *DataBlockSize) sizeSet(v int) *DataBlockSize {
*x = *x&^0x7FFFFFFF | DataBlockSize(v)&0x7FFFFFFF
return x
}
func (x *DataBlockSize) UncompressedSet(v bool) *DataBlockSize {
const b = 1 << 31
if v {
*x = *x&^b | b
} else {
*x &^= b
}
return x
}

View File

@@ -0,0 +1,212 @@
// Package xxh32 implements the very fast XXH hashing algorithm (32 bits version).
// (https://github.com/Cyan4973/XXH/)
package xxh32
import (
"encoding/binary"
)
const (
prime1 uint32 = 2654435761
prime2 uint32 = 2246822519
prime3 uint32 = 3266489917
prime4 uint32 = 668265263
prime5 uint32 = 374761393
primeMask = 0xFFFFFFFF
prime1plus2 = uint32((uint64(prime1) + uint64(prime2)) & primeMask) // 606290984
prime1minus = uint32((-int64(prime1)) & primeMask) // 1640531535
)
// XXHZero represents an xxhash32 object with seed 0.
type XXHZero struct {
v [4]uint32
totalLen uint64
buf [16]byte
bufused int
}
// Sum appends the current hash to b and returns the resulting slice.
// It does not change the underlying hash state.
func (xxh XXHZero) Sum(b []byte) []byte {
h32 := xxh.Sum32()
return append(b, byte(h32), byte(h32>>8), byte(h32>>16), byte(h32>>24))
}
// Reset resets the Hash to its initial state.
func (xxh *XXHZero) Reset() {
xxh.v[0] = prime1plus2
xxh.v[1] = prime2
xxh.v[2] = 0
xxh.v[3] = prime1minus
xxh.totalLen = 0
xxh.bufused = 0
}
// Size returns the number of bytes returned by Sum().
func (xxh *XXHZero) Size() int {
return 4
}
// BlockSizeIndex gives the minimum number of bytes accepted by Write().
func (xxh *XXHZero) BlockSize() int {
return 1
}
// Write adds input bytes to the Hash.
// It never returns an error.
func (xxh *XXHZero) Write(input []byte) (int, error) {
if xxh.totalLen == 0 {
xxh.Reset()
}
n := len(input)
m := xxh.bufused
xxh.totalLen += uint64(n)
r := len(xxh.buf) - m
if n < r {
copy(xxh.buf[m:], input)
xxh.bufused += len(input)
return n, nil
}
var buf *[16]byte
if m != 0 {
// some data left from previous update
buf = &xxh.buf
c := copy(buf[m:], input)
n -= c
input = input[c:]
}
update(&xxh.v, buf, input)
xxh.bufused = copy(xxh.buf[:], input[n-n%16:])
return n, nil
}
// Portable version of update. This updates v by processing all of buf
// (if not nil) and all full 16-byte blocks of input.
func updateGo(v *[4]uint32, buf *[16]byte, input []byte) {
// Causes compiler to work directly from registers instead of stack:
v1, v2, v3, v4 := v[0], v[1], v[2], v[3]
if buf != nil {
v1 = rol13(v1+binary.LittleEndian.Uint32(buf[:])*prime2) * prime1
v2 = rol13(v2+binary.LittleEndian.Uint32(buf[4:])*prime2) * prime1
v3 = rol13(v3+binary.LittleEndian.Uint32(buf[8:])*prime2) * prime1
v4 = rol13(v4+binary.LittleEndian.Uint32(buf[12:])*prime2) * prime1
}
for ; len(input) >= 16; input = input[16:] {
sub := input[:16] //BCE hint for compiler
v1 = rol13(v1+binary.LittleEndian.Uint32(sub[:])*prime2) * prime1
v2 = rol13(v2+binary.LittleEndian.Uint32(sub[4:])*prime2) * prime1
v3 = rol13(v3+binary.LittleEndian.Uint32(sub[8:])*prime2) * prime1
v4 = rol13(v4+binary.LittleEndian.Uint32(sub[12:])*prime2) * prime1
}
v[0], v[1], v[2], v[3] = v1, v2, v3, v4
}
// Sum32 returns the 32 bits Hash value.
func (xxh *XXHZero) Sum32() uint32 {
h32 := uint32(xxh.totalLen)
if h32 >= 16 {
h32 += rol1(xxh.v[0]) + rol7(xxh.v[1]) + rol12(xxh.v[2]) + rol18(xxh.v[3])
} else {
h32 += prime5
}
p := 0
n := xxh.bufused
buf := xxh.buf
for n := n - 4; p <= n; p += 4 {
h32 += binary.LittleEndian.Uint32(buf[p:p+4]) * prime3
h32 = rol17(h32) * prime4
}
for ; p < n; p++ {
h32 += uint32(buf[p]) * prime5
h32 = rol11(h32) * prime1
}
h32 ^= h32 >> 15
h32 *= prime2
h32 ^= h32 >> 13
h32 *= prime3
h32 ^= h32 >> 16
return h32
}
// Portable version of ChecksumZero.
func checksumZeroGo(input []byte) uint32 {
n := len(input)
h32 := uint32(n)
if n < 16 {
h32 += prime5
} else {
v1 := prime1plus2
v2 := prime2
v3 := uint32(0)
v4 := prime1minus
p := 0
for n := n - 16; p <= n; p += 16 {
sub := input[p:][:16] //BCE hint for compiler
v1 = rol13(v1+binary.LittleEndian.Uint32(sub[:])*prime2) * prime1
v2 = rol13(v2+binary.LittleEndian.Uint32(sub[4:])*prime2) * prime1
v3 = rol13(v3+binary.LittleEndian.Uint32(sub[8:])*prime2) * prime1
v4 = rol13(v4+binary.LittleEndian.Uint32(sub[12:])*prime2) * prime1
}
input = input[p:]
n -= p
h32 += rol1(v1) + rol7(v2) + rol12(v3) + rol18(v4)
}
p := 0
for n := n - 4; p <= n; p += 4 {
h32 += binary.LittleEndian.Uint32(input[p:p+4]) * prime3
h32 = rol17(h32) * prime4
}
for p < n {
h32 += uint32(input[p]) * prime5
h32 = rol11(h32) * prime1
p++
}
h32 ^= h32 >> 15
h32 *= prime2
h32 ^= h32 >> 13
h32 *= prime3
h32 ^= h32 >> 16
return h32
}
func rol1(u uint32) uint32 {
return u<<1 | u>>31
}
func rol7(u uint32) uint32 {
return u<<7 | u>>25
}
func rol11(u uint32) uint32 {
return u<<11 | u>>21
}
func rol12(u uint32) uint32 {
return u<<12 | u>>20
}
func rol13(u uint32) uint32 {
return u<<13 | u>>19
}
func rol17(u uint32) uint32 {
return u<<17 | u>>15
}
func rol18(u uint32) uint32 {
return u<<18 | u>>14
}

View File

@@ -0,0 +1,11 @@
// +build !noasm
package xxh32
// ChecksumZero returns the 32-bit hash of input.
//
//go:noescape
func ChecksumZero(input []byte) uint32
//go:noescape
func update(v *[4]uint32, buf *[16]byte, input []byte)

View File

@@ -0,0 +1,259 @@
// +build !noasm
#include "textflag.h"
#define prime1 $2654435761
#define prime2 $2246822519
#define prime3 $3266489917
#define prime4 $668265263
#define prime5 $374761393
#define prime1plus2 $606290984
#define prime1minus $1640531535
// Register allocation.
#define p R0
#define n R1
#define h R2
#define v1 R2 // Alias for h.
#define v2 R3
#define v3 R4
#define v4 R5
#define x1 R6
#define x2 R7
#define x3 R8
#define x4 R9
// We need the primes in registers. The 16-byte loop only uses prime{1,2}.
#define prime1r R11
#define prime2r R12
#define prime3r R3 // The rest can alias v{2-4}.
#define prime4r R4
#define prime5r R5
// Update round macros. These read from and increment p.
#define round16aligned \
MOVM.IA.W (p), [x1, x2, x3, x4] \
\
MULA x1, prime2r, v1, v1 \
MULA x2, prime2r, v2, v2 \
MULA x3, prime2r, v3, v3 \
MULA x4, prime2r, v4, v4 \
\
MOVW v1 @> 19, v1 \
MOVW v2 @> 19, v2 \
MOVW v3 @> 19, v3 \
MOVW v4 @> 19, v4 \
\
MUL prime1r, v1 \
MUL prime1r, v2 \
MUL prime1r, v3 \
MUL prime1r, v4 \
#define round16unaligned \
MOVBU.P 16(p), x1 \
MOVBU -15(p), x2 \
ORR x2 << 8, x1 \
MOVBU -14(p), x3 \
MOVBU -13(p), x4 \
ORR x4 << 8, x3 \
ORR x3 << 16, x1 \
\
MULA x1, prime2r, v1, v1 \
MOVW v1 @> 19, v1 \
MUL prime1r, v1 \
\
MOVBU -12(p), x1 \
MOVBU -11(p), x2 \
ORR x2 << 8, x1 \
MOVBU -10(p), x3 \
MOVBU -9(p), x4 \
ORR x4 << 8, x3 \
ORR x3 << 16, x1 \
\
MULA x1, prime2r, v2, v2 \
MOVW v2 @> 19, v2 \
MUL prime1r, v2 \
\
MOVBU -8(p), x1 \
MOVBU -7(p), x2 \
ORR x2 << 8, x1 \
MOVBU -6(p), x3 \
MOVBU -5(p), x4 \
ORR x4 << 8, x3 \
ORR x3 << 16, x1 \
\
MULA x1, prime2r, v3, v3 \
MOVW v3 @> 19, v3 \
MUL prime1r, v3 \
\
MOVBU -4(p), x1 \
MOVBU -3(p), x2 \
ORR x2 << 8, x1 \
MOVBU -2(p), x3 \
MOVBU -1(p), x4 \
ORR x4 << 8, x3 \
ORR x3 << 16, x1 \
\
MULA x1, prime2r, v4, v4 \
MOVW v4 @> 19, v4 \
MUL prime1r, v4 \
// func ChecksumZero([]byte) uint32
TEXT ·ChecksumZero(SB), NOFRAME|NOSPLIT, $-4-16
MOVW input_base+0(FP), p
MOVW input_len+4(FP), n
MOVW prime1, prime1r
MOVW prime2, prime2r
// Set up h for n < 16. It's tempting to say {ADD prime5, n, h}
// here, but that's a pseudo-op that generates a load through R11.
MOVW prime5, prime5r
ADD prime5r, n, h
CMP $0, n
BEQ end
// We let n go negative so we can do comparisons with SUB.S
// instead of separate CMP.
SUB.S $16, n
BMI loop16done
MOVW prime1plus2, v1
MOVW prime2, v2
MOVW $0, v3
MOVW prime1minus, v4
TST $3, p
BNE loop16unaligned
loop16aligned:
SUB.S $16, n
round16aligned
BPL loop16aligned
B loop16finish
loop16unaligned:
SUB.S $16, n
round16unaligned
BPL loop16unaligned
loop16finish:
MOVW v1 @> 31, h
ADD v2 @> 25, h
ADD v3 @> 20, h
ADD v4 @> 14, h
// h += len(input) with v2 as temporary.
MOVW input_len+4(FP), v2
ADD v2, h
loop16done:
ADD $16, n // Restore number of bytes left.
SUB.S $4, n
MOVW prime3, prime3r
BMI loop4done
MOVW prime4, prime4r
TST $3, p
BNE loop4unaligned
loop4aligned:
SUB.S $4, n
MOVW.P 4(p), x1
MULA prime3r, x1, h, h
MOVW h @> 15, h
MUL prime4r, h
BPL loop4aligned
B loop4done
loop4unaligned:
SUB.S $4, n
MOVBU.P 4(p), x1
MOVBU -3(p), x2
ORR x2 << 8, x1
MOVBU -2(p), x3
ORR x3 << 16, x1
MOVBU -1(p), x4
ORR x4 << 24, x1
MULA prime3r, x1, h, h
MOVW h @> 15, h
MUL prime4r, h
BPL loop4unaligned
loop4done:
ADD.S $4, n // Restore number of bytes left.
BEQ end
MOVW prime5, prime5r
loop1:
SUB.S $1, n
MOVBU.P 1(p), x1
MULA prime5r, x1, h, h
MOVW h @> 21, h
MUL prime1r, h
BNE loop1
end:
MOVW prime3, prime3r
EOR h >> 15, h
MUL prime2r, h
EOR h >> 13, h
MUL prime3r, h
EOR h >> 16, h
MOVW h, ret+12(FP)
RET
// func update(v *[4]uint64, buf *[16]byte, p []byte)
TEXT ·update(SB), NOFRAME|NOSPLIT, $-4-20
MOVW v+0(FP), p
MOVM.IA (p), [v1, v2, v3, v4]
MOVW prime1, prime1r
MOVW prime2, prime2r
// Process buf, if not nil.
MOVW buf+4(FP), p
CMP $0, p
BEQ noBuffered
round16aligned
noBuffered:
MOVW input_base +8(FP), p
MOVW input_len +12(FP), n
SUB.S $16, n
BMI end
TST $3, p
BNE loop16unaligned
loop16aligned:
SUB.S $16, n
round16aligned
BPL loop16aligned
B end
loop16unaligned:
SUB.S $16, n
round16unaligned
BPL loop16unaligned
end:
MOVW v+0(FP), p
MOVM.IA [v1, v2, v3, v4], (p)
RET

View File

@@ -0,0 +1,10 @@
// +build !arm noasm
package xxh32
// ChecksumZero returns the 32-bit hash of input.
func ChecksumZero(input []byte) uint32 { return checksumZeroGo(input) }
func update(v *[4]uint32, buf *[16]byte, input []byte) {
updateGo(v, buf, input)
}