forked from Shiloh/githaven
12a1f914f4
* update github.com/alecthomas/chroma v0.8.0 -> v0.8.1 * github.com/blevesearch/bleve v1.0.10 -> v1.0.12 * editorconfig-core-go v2.1.1 -> v2.3.7 * github.com/gliderlabs/ssh v0.2.2 -> v0.3.1 * migrate editorconfig.ParseBytes to Parse * github.com/shurcooL/vfsgen to 0d455de96546 * github.com/go-git/go-git/v5 v5.1.0 -> v5.2.0 * github.com/google/uuid v1.1.1 -> v1.1.2 * github.com/huandu/xstrings v1.3.0 -> v1.3.2 * github.com/klauspost/compress v1.10.11 -> v1.11.1 * github.com/markbates/goth v1.61.2 -> v1.65.0 * github.com/mattn/go-sqlite3 v1.14.0 -> v1.14.4 * github.com/mholt/archiver v3.3.0 -> v3.3.2 * github.com/microcosm-cc/bluemonday 4f7140c49acb -> v1.0.4 * github.com/minio/minio-go v7.0.4 -> v7.0.5 * github.com/olivere/elastic v7.0.9 -> v7.0.20 * github.com/urfave/cli v1.20.0 -> v1.22.4 * github.com/prometheus/client_golang v1.1.0 -> v1.8.0 * github.com/xanzy/go-gitlab v0.37.0 -> v0.38.1 * mvdan.cc/xurls v2.1.0 -> v2.2.0 Co-authored-by: Lauris BH <lauris@nix.lv>
216 lines
3.5 KiB
ArmAsm
Vendored
216 lines
3.5 KiB
ArmAsm
Vendored
// +build !appengine
|
|
// +build gc
|
|
// +build !purego
|
|
|
|
#include "textflag.h"
|
|
|
|
// Register allocation:
|
|
// AX h
|
|
// CX pointer to advance through b
|
|
// DX n
|
|
// BX loop end
|
|
// R8 v1, k1
|
|
// R9 v2
|
|
// R10 v3
|
|
// R11 v4
|
|
// R12 tmp
|
|
// R13 prime1v
|
|
// R14 prime2v
|
|
// R15 prime4v
|
|
|
|
// round reads from and advances the buffer pointer in CX.
|
|
// It assumes that R13 has prime1v and R14 has prime2v.
|
|
#define round(r) \
|
|
MOVQ (CX), R12 \
|
|
ADDQ $8, CX \
|
|
IMULQ R14, R12 \
|
|
ADDQ R12, r \
|
|
ROLQ $31, r \
|
|
IMULQ R13, r
|
|
|
|
// mergeRound applies a merge round on the two registers acc and val.
|
|
// It assumes that R13 has prime1v, R14 has prime2v, and R15 has prime4v.
|
|
#define mergeRound(acc, val) \
|
|
IMULQ R14, val \
|
|
ROLQ $31, val \
|
|
IMULQ R13, val \
|
|
XORQ val, acc \
|
|
IMULQ R13, acc \
|
|
ADDQ R15, acc
|
|
|
|
// func Sum64(b []byte) uint64
|
|
TEXT ·Sum64(SB), NOSPLIT, $0-32
|
|
// Load fixed primes.
|
|
MOVQ ·prime1v(SB), R13
|
|
MOVQ ·prime2v(SB), R14
|
|
MOVQ ·prime4v(SB), R15
|
|
|
|
// Load slice.
|
|
MOVQ b_base+0(FP), CX
|
|
MOVQ b_len+8(FP), DX
|
|
LEAQ (CX)(DX*1), BX
|
|
|
|
// The first loop limit will be len(b)-32.
|
|
SUBQ $32, BX
|
|
|
|
// Check whether we have at least one block.
|
|
CMPQ DX, $32
|
|
JLT noBlocks
|
|
|
|
// Set up initial state (v1, v2, v3, v4).
|
|
MOVQ R13, R8
|
|
ADDQ R14, R8
|
|
MOVQ R14, R9
|
|
XORQ R10, R10
|
|
XORQ R11, R11
|
|
SUBQ R13, R11
|
|
|
|
// Loop until CX > BX.
|
|
blockLoop:
|
|
round(R8)
|
|
round(R9)
|
|
round(R10)
|
|
round(R11)
|
|
|
|
CMPQ CX, BX
|
|
JLE blockLoop
|
|
|
|
MOVQ R8, AX
|
|
ROLQ $1, AX
|
|
MOVQ R9, R12
|
|
ROLQ $7, R12
|
|
ADDQ R12, AX
|
|
MOVQ R10, R12
|
|
ROLQ $12, R12
|
|
ADDQ R12, AX
|
|
MOVQ R11, R12
|
|
ROLQ $18, R12
|
|
ADDQ R12, AX
|
|
|
|
mergeRound(AX, R8)
|
|
mergeRound(AX, R9)
|
|
mergeRound(AX, R10)
|
|
mergeRound(AX, R11)
|
|
|
|
JMP afterBlocks
|
|
|
|
noBlocks:
|
|
MOVQ ·prime5v(SB), AX
|
|
|
|
afterBlocks:
|
|
ADDQ DX, AX
|
|
|
|
// Right now BX has len(b)-32, and we want to loop until CX > len(b)-8.
|
|
ADDQ $24, BX
|
|
|
|
CMPQ CX, BX
|
|
JG fourByte
|
|
|
|
wordLoop:
|
|
// Calculate k1.
|
|
MOVQ (CX), R8
|
|
ADDQ $8, CX
|
|
IMULQ R14, R8
|
|
ROLQ $31, R8
|
|
IMULQ R13, R8
|
|
|
|
XORQ R8, AX
|
|
ROLQ $27, AX
|
|
IMULQ R13, AX
|
|
ADDQ R15, AX
|
|
|
|
CMPQ CX, BX
|
|
JLE wordLoop
|
|
|
|
fourByte:
|
|
ADDQ $4, BX
|
|
CMPQ CX, BX
|
|
JG singles
|
|
|
|
MOVL (CX), R8
|
|
ADDQ $4, CX
|
|
IMULQ R13, R8
|
|
XORQ R8, AX
|
|
|
|
ROLQ $23, AX
|
|
IMULQ R14, AX
|
|
ADDQ ·prime3v(SB), AX
|
|
|
|
singles:
|
|
ADDQ $4, BX
|
|
CMPQ CX, BX
|
|
JGE finalize
|
|
|
|
singlesLoop:
|
|
MOVBQZX (CX), R12
|
|
ADDQ $1, CX
|
|
IMULQ ·prime5v(SB), R12
|
|
XORQ R12, AX
|
|
|
|
ROLQ $11, AX
|
|
IMULQ R13, AX
|
|
|
|
CMPQ CX, BX
|
|
JL singlesLoop
|
|
|
|
finalize:
|
|
MOVQ AX, R12
|
|
SHRQ $33, R12
|
|
XORQ R12, AX
|
|
IMULQ R14, AX
|
|
MOVQ AX, R12
|
|
SHRQ $29, R12
|
|
XORQ R12, AX
|
|
IMULQ ·prime3v(SB), AX
|
|
MOVQ AX, R12
|
|
SHRQ $32, R12
|
|
XORQ R12, AX
|
|
|
|
MOVQ AX, ret+24(FP)
|
|
RET
|
|
|
|
// writeBlocks uses the same registers as above except that it uses AX to store
|
|
// the d pointer.
|
|
|
|
// func writeBlocks(d *Digest, b []byte) int
|
|
TEXT ·writeBlocks(SB), NOSPLIT, $0-40
|
|
// Load fixed primes needed for round.
|
|
MOVQ ·prime1v(SB), R13
|
|
MOVQ ·prime2v(SB), R14
|
|
|
|
// Load slice.
|
|
MOVQ b_base+8(FP), CX
|
|
MOVQ b_len+16(FP), DX
|
|
LEAQ (CX)(DX*1), BX
|
|
SUBQ $32, BX
|
|
|
|
// Load vN from d.
|
|
MOVQ d+0(FP), AX
|
|
MOVQ 0(AX), R8 // v1
|
|
MOVQ 8(AX), R9 // v2
|
|
MOVQ 16(AX), R10 // v3
|
|
MOVQ 24(AX), R11 // v4
|
|
|
|
// We don't need to check the loop condition here; this function is
|
|
// always called with at least one block of data to process.
|
|
blockLoop:
|
|
round(R8)
|
|
round(R9)
|
|
round(R10)
|
|
round(R11)
|
|
|
|
CMPQ CX, BX
|
|
JLE blockLoop
|
|
|
|
// Copy vN back to d.
|
|
MOVQ R8, 0(AX)
|
|
MOVQ R9, 8(AX)
|
|
MOVQ R10, 16(AX)
|
|
MOVQ R11, 24(AX)
|
|
|
|
// The number of bytes written is CX minus the old base pointer.
|
|
SUBQ b_base+8(FP), CX
|
|
MOVQ CX, ret+32(FP)
|
|
|
|
RET
|