2020-11-06 18:41:42 +00:00
|
|
|
|
// +build gc
|
|
|
|
|
// +build !noasm
|
|
|
|
|
|
|
|
|
|
#include "textflag.h"
|
|
|
|
|
|
|
|
|
|
// Register allocation.
|
|
|
|
|
#define dst R0
|
|
|
|
|
#define dstorig R1
|
|
|
|
|
#define src R2
|
|
|
|
|
#define dstend R3
|
|
|
|
|
#define srcend R4
|
|
|
|
|
#define match R5 // Match address.
|
|
|
|
|
#define token R6
|
|
|
|
|
#define len R7 // Literal and match lengths.
|
|
|
|
|
#define offset R6 // Match offset; overlaps with token.
|
|
|
|
|
#define tmp1 R8
|
|
|
|
|
#define tmp2 R9
|
|
|
|
|
#define tmp3 R12
|
|
|
|
|
|
|
|
|
|
#define minMatch $4
|
|
|
|
|
|
2021-06-10 14:44:25 +00:00
|
|
|
|
// func decodeBlockNodict(dst, src []byte) int
|
|
|
|
|
TEXT ·decodeBlockNodict(SB), NOFRAME+NOSPLIT, $-4-28
|
|
|
|
|
MOVW dst_base +0(FP), dst
|
|
|
|
|
MOVW dst_len +4(FP), dstend
|
|
|
|
|
MOVW src_base +12(FP), src
|
|
|
|
|
MOVW src_len +16(FP), srcend
|
2020-11-06 18:41:42 +00:00
|
|
|
|
|
|
|
|
|
CMP $0, srcend
|
|
|
|
|
BEQ shortSrc
|
|
|
|
|
|
|
|
|
|
ADD dst, dstend
|
|
|
|
|
ADD src, srcend
|
|
|
|
|
|
|
|
|
|
MOVW dst, dstorig
|
|
|
|
|
|
|
|
|
|
loop:
|
|
|
|
|
// Read token. Extract literal length.
|
|
|
|
|
MOVBU.P 1(src), token
|
|
|
|
|
MOVW token >> 4, len
|
|
|
|
|
CMP $15, len
|
|
|
|
|
BNE readLitlenDone
|
|
|
|
|
|
|
|
|
|
readLitlenLoop:
|
|
|
|
|
CMP src, srcend
|
|
|
|
|
BEQ shortSrc
|
|
|
|
|
MOVBU.P 1(src), tmp1
|
|
|
|
|
ADD tmp1, len
|
|
|
|
|
CMP $255, tmp1
|
|
|
|
|
BEQ readLitlenLoop
|
|
|
|
|
|
|
|
|
|
readLitlenDone:
|
|
|
|
|
CMP $0, len
|
|
|
|
|
BEQ copyLiteralDone
|
|
|
|
|
|
|
|
|
|
// Bounds check dst+len and src+len.
|
|
|
|
|
ADD dst, len, tmp1
|
|
|
|
|
CMP dstend, tmp1
|
|
|
|
|
//BHI shortDst // Uncomment for distinct error codes.
|
|
|
|
|
ADD src, len, tmp2
|
|
|
|
|
CMP.LS srcend, tmp2
|
|
|
|
|
BHI shortSrc
|
|
|
|
|
|
|
|
|
|
// Copy literal.
|
|
|
|
|
CMP $4, len
|
|
|
|
|
BLO copyLiteralFinish
|
|
|
|
|
|
|
|
|
|
// Copy 0-3 bytes until src is aligned.
|
|
|
|
|
TST $1, src
|
|
|
|
|
MOVBU.NE.P 1(src), tmp1
|
|
|
|
|
MOVB.NE.P tmp1, 1(dst)
|
|
|
|
|
SUB.NE $1, len
|
|
|
|
|
|
|
|
|
|
TST $2, src
|
|
|
|
|
MOVHU.NE.P 2(src), tmp2
|
|
|
|
|
MOVB.NE.P tmp2, 1(dst)
|
|
|
|
|
MOVW.NE tmp2 >> 8, tmp1
|
|
|
|
|
MOVB.NE.P tmp1, 1(dst)
|
|
|
|
|
SUB.NE $2, len
|
|
|
|
|
|
|
|
|
|
B copyLiteralLoopCond
|
|
|
|
|
|
|
|
|
|
copyLiteralLoop:
|
|
|
|
|
// Aligned load, unaligned write.
|
|
|
|
|
MOVW.P 4(src), tmp1
|
|
|
|
|
MOVW tmp1 >> 8, tmp2
|
|
|
|
|
MOVB tmp2, 1(dst)
|
|
|
|
|
MOVW tmp1 >> 16, tmp3
|
|
|
|
|
MOVB tmp3, 2(dst)
|
|
|
|
|
MOVW tmp1 >> 24, tmp2
|
|
|
|
|
MOVB tmp2, 3(dst)
|
|
|
|
|
MOVB.P tmp1, 4(dst)
|
|
|
|
|
copyLiteralLoopCond:
|
|
|
|
|
// Loop until len-4 < 0.
|
|
|
|
|
SUB.S $4, len
|
|
|
|
|
BPL copyLiteralLoop
|
|
|
|
|
|
|
|
|
|
// Restore len, which is now negative.
|
|
|
|
|
ADD $4, len
|
|
|
|
|
|
|
|
|
|
copyLiteralFinish:
|
|
|
|
|
// Copy remaining 0-3 bytes.
|
|
|
|
|
TST $2, len
|
|
|
|
|
MOVHU.NE.P 2(src), tmp2
|
|
|
|
|
MOVB.NE.P tmp2, 1(dst)
|
|
|
|
|
MOVW.NE tmp2 >> 8, tmp1
|
|
|
|
|
MOVB.NE.P tmp1, 1(dst)
|
|
|
|
|
TST $1, len
|
|
|
|
|
MOVBU.NE.P 1(src), tmp1
|
|
|
|
|
MOVB.NE.P tmp1, 1(dst)
|
|
|
|
|
|
|
|
|
|
copyLiteralDone:
|
|
|
|
|
CMP src, srcend
|
|
|
|
|
BEQ end
|
|
|
|
|
|
|
|
|
|
// Initial part of match length.
|
|
|
|
|
// This frees up the token register for reuse as offset.
|
|
|
|
|
AND $15, token, len
|
|
|
|
|
|
|
|
|
|
// Read offset.
|
|
|
|
|
ADD $2, src
|
|
|
|
|
CMP srcend, src
|
|
|
|
|
BHI shortSrc
|
|
|
|
|
MOVBU -2(src), offset
|
|
|
|
|
MOVBU -1(src), tmp1
|
|
|
|
|
ORR tmp1 << 8, offset
|
|
|
|
|
CMP $0, offset
|
|
|
|
|
BEQ corrupt
|
|
|
|
|
|
|
|
|
|
// Read rest of match length.
|
|
|
|
|
CMP $15, len
|
|
|
|
|
BNE readMatchlenDone
|
|
|
|
|
|
|
|
|
|
readMatchlenLoop:
|
|
|
|
|
CMP src, srcend
|
|
|
|
|
BEQ shortSrc
|
|
|
|
|
MOVBU.P 1(src), tmp1
|
|
|
|
|
ADD tmp1, len
|
|
|
|
|
CMP $255, tmp1
|
|
|
|
|
BEQ readMatchlenLoop
|
|
|
|
|
|
|
|
|
|
readMatchlenDone:
|
2021-02-28 23:08:33 +00:00
|
|
|
|
// Bounds check dst+len+minMatch and match = dst-offset.
|
2020-11-06 18:41:42 +00:00
|
|
|
|
ADD dst, len, tmp1
|
2021-02-28 23:08:33 +00:00
|
|
|
|
ADD minMatch, tmp1
|
2020-11-06 18:41:42 +00:00
|
|
|
|
CMP dstend, tmp1
|
|
|
|
|
//BHI shortDst // Uncomment for distinct error codes.
|
|
|
|
|
SUB offset, dst, match
|
|
|
|
|
CMP.LS match, dstorig
|
|
|
|
|
BHI corrupt
|
|
|
|
|
|
2021-02-28 23:08:33 +00:00
|
|
|
|
// Since len+minMatch is at least four, we can do a 4× unrolled
|
|
|
|
|
// byte copy loop. Using MOVW instead of four byte loads is faster,
|
|
|
|
|
// but to remain portable we'd have to align match first, which is
|
|
|
|
|
// too expensive. By alternating loads and stores, we also handle
|
|
|
|
|
// the case offset < 4.
|
2020-11-06 18:41:42 +00:00
|
|
|
|
copyMatch4:
|
2021-02-28 23:08:33 +00:00
|
|
|
|
SUB.S $4, len
|
2020-11-06 18:41:42 +00:00
|
|
|
|
MOVBU.P 4(match), tmp1
|
|
|
|
|
MOVB.P tmp1, 4(dst)
|
|
|
|
|
MOVBU -3(match), tmp2
|
|
|
|
|
MOVB tmp2, -3(dst)
|
|
|
|
|
MOVBU -2(match), tmp3
|
|
|
|
|
MOVB tmp3, -2(dst)
|
|
|
|
|
MOVBU -1(match), tmp1
|
|
|
|
|
MOVB tmp1, -1(dst)
|
|
|
|
|
BPL copyMatch4
|
|
|
|
|
|
|
|
|
|
// Restore len, which is now negative.
|
|
|
|
|
ADD.S $4, len
|
|
|
|
|
BEQ copyMatchDone
|
|
|
|
|
|
|
|
|
|
copyMatch:
|
2021-02-28 23:08:33 +00:00
|
|
|
|
// Finish with a byte-at-a-time copy.
|
2020-11-06 18:41:42 +00:00
|
|
|
|
SUB.S $1, len
|
|
|
|
|
MOVBU.P 1(match), tmp2
|
|
|
|
|
MOVB.P tmp2, 1(dst)
|
|
|
|
|
BNE copyMatch
|
|
|
|
|
|
|
|
|
|
copyMatchDone:
|
|
|
|
|
CMP src, srcend
|
|
|
|
|
BNE loop
|
|
|
|
|
|
|
|
|
|
end:
|
|
|
|
|
SUB dstorig, dst, tmp1
|
|
|
|
|
MOVW tmp1, ret+24(FP)
|
|
|
|
|
RET
|
|
|
|
|
|
|
|
|
|
// The three error cases have distinct labels so we can put different
|
|
|
|
|
// return codes here when debugging, or if the error returns need to
|
|
|
|
|
// be changed.
|
|
|
|
|
shortDst:
|
|
|
|
|
shortSrc:
|
|
|
|
|
corrupt:
|
|
|
|
|
MOVW $-1, tmp1
|
|
|
|
|
MOVW tmp1, ret+24(FP)
|
|
|
|
|
RET
|