170 lines
		
	
	
		
			2.5 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
	
	
			
		
		
	
	
			170 lines
		
	
	
		
			2.5 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
	
	
| // Copyright 2012 The Go Authors. All rights reserved.
 | |
| // Use of this source code is governed by a BSD-style
 | |
| // license that can be found in the LICENSE file.
 | |
| 
 | |
| // This code was translated into a form compatible with 6a from the public
 | |
| // domain sources in SUPERCOP: https://bench.cr.yp.to/supercop.html
 | |
| 
 | |
| // +build amd64,!gccgo,!appengine
 | |
| 
 | |
| #include "const_amd64.h"
 | |
| 
 | |
| // func mul(dest, a, b *[5]uint64)
 | |
| TEXT ·mul(SB),0,$16-24
 | |
| 	MOVQ dest+0(FP), DI
 | |
| 	MOVQ a+8(FP), SI
 | |
| 	MOVQ b+16(FP), DX
 | |
| 
 | |
| 	MOVQ DX,CX
 | |
| 	MOVQ 24(SI),DX
 | |
| 	IMUL3Q $19,DX,AX
 | |
| 	MOVQ AX,0(SP)
 | |
| 	MULQ 16(CX)
 | |
| 	MOVQ AX,R8
 | |
| 	MOVQ DX,R9
 | |
| 	MOVQ 32(SI),DX
 | |
| 	IMUL3Q $19,DX,AX
 | |
| 	MOVQ AX,8(SP)
 | |
| 	MULQ 8(CX)
 | |
| 	ADDQ AX,R8
 | |
| 	ADCQ DX,R9
 | |
| 	MOVQ 0(SI),AX
 | |
| 	MULQ 0(CX)
 | |
| 	ADDQ AX,R8
 | |
| 	ADCQ DX,R9
 | |
| 	MOVQ 0(SI),AX
 | |
| 	MULQ 8(CX)
 | |
| 	MOVQ AX,R10
 | |
| 	MOVQ DX,R11
 | |
| 	MOVQ 0(SI),AX
 | |
| 	MULQ 16(CX)
 | |
| 	MOVQ AX,R12
 | |
| 	MOVQ DX,R13
 | |
| 	MOVQ 0(SI),AX
 | |
| 	MULQ 24(CX)
 | |
| 	MOVQ AX,R14
 | |
| 	MOVQ DX,R15
 | |
| 	MOVQ 0(SI),AX
 | |
| 	MULQ 32(CX)
 | |
| 	MOVQ AX,BX
 | |
| 	MOVQ DX,BP
 | |
| 	MOVQ 8(SI),AX
 | |
| 	MULQ 0(CX)
 | |
| 	ADDQ AX,R10
 | |
| 	ADCQ DX,R11
 | |
| 	MOVQ 8(SI),AX
 | |
| 	MULQ 8(CX)
 | |
| 	ADDQ AX,R12
 | |
| 	ADCQ DX,R13
 | |
| 	MOVQ 8(SI),AX
 | |
| 	MULQ 16(CX)
 | |
| 	ADDQ AX,R14
 | |
| 	ADCQ DX,R15
 | |
| 	MOVQ 8(SI),AX
 | |
| 	MULQ 24(CX)
 | |
| 	ADDQ AX,BX
 | |
| 	ADCQ DX,BP
 | |
| 	MOVQ 8(SI),DX
 | |
| 	IMUL3Q $19,DX,AX
 | |
| 	MULQ 32(CX)
 | |
| 	ADDQ AX,R8
 | |
| 	ADCQ DX,R9
 | |
| 	MOVQ 16(SI),AX
 | |
| 	MULQ 0(CX)
 | |
| 	ADDQ AX,R12
 | |
| 	ADCQ DX,R13
 | |
| 	MOVQ 16(SI),AX
 | |
| 	MULQ 8(CX)
 | |
| 	ADDQ AX,R14
 | |
| 	ADCQ DX,R15
 | |
| 	MOVQ 16(SI),AX
 | |
| 	MULQ 16(CX)
 | |
| 	ADDQ AX,BX
 | |
| 	ADCQ DX,BP
 | |
| 	MOVQ 16(SI),DX
 | |
| 	IMUL3Q $19,DX,AX
 | |
| 	MULQ 24(CX)
 | |
| 	ADDQ AX,R8
 | |
| 	ADCQ DX,R9
 | |
| 	MOVQ 16(SI),DX
 | |
| 	IMUL3Q $19,DX,AX
 | |
| 	MULQ 32(CX)
 | |
| 	ADDQ AX,R10
 | |
| 	ADCQ DX,R11
 | |
| 	MOVQ 24(SI),AX
 | |
| 	MULQ 0(CX)
 | |
| 	ADDQ AX,R14
 | |
| 	ADCQ DX,R15
 | |
| 	MOVQ 24(SI),AX
 | |
| 	MULQ 8(CX)
 | |
| 	ADDQ AX,BX
 | |
| 	ADCQ DX,BP
 | |
| 	MOVQ 0(SP),AX
 | |
| 	MULQ 24(CX)
 | |
| 	ADDQ AX,R10
 | |
| 	ADCQ DX,R11
 | |
| 	MOVQ 0(SP),AX
 | |
| 	MULQ 32(CX)
 | |
| 	ADDQ AX,R12
 | |
| 	ADCQ DX,R13
 | |
| 	MOVQ 32(SI),AX
 | |
| 	MULQ 0(CX)
 | |
| 	ADDQ AX,BX
 | |
| 	ADCQ DX,BP
 | |
| 	MOVQ 8(SP),AX
 | |
| 	MULQ 16(CX)
 | |
| 	ADDQ AX,R10
 | |
| 	ADCQ DX,R11
 | |
| 	MOVQ 8(SP),AX
 | |
| 	MULQ 24(CX)
 | |
| 	ADDQ AX,R12
 | |
| 	ADCQ DX,R13
 | |
| 	MOVQ 8(SP),AX
 | |
| 	MULQ 32(CX)
 | |
| 	ADDQ AX,R14
 | |
| 	ADCQ DX,R15
 | |
| 	MOVQ $REDMASK51,SI
 | |
| 	SHLQ $13,R9:R8
 | |
| 	ANDQ SI,R8
 | |
| 	SHLQ $13,R11:R10
 | |
| 	ANDQ SI,R10
 | |
| 	ADDQ R9,R10
 | |
| 	SHLQ $13,R13:R12
 | |
| 	ANDQ SI,R12
 | |
| 	ADDQ R11,R12
 | |
| 	SHLQ $13,R15:R14
 | |
| 	ANDQ SI,R14
 | |
| 	ADDQ R13,R14
 | |
| 	SHLQ $13,BP:BX
 | |
| 	ANDQ SI,BX
 | |
| 	ADDQ R15,BX
 | |
| 	IMUL3Q $19,BP,DX
 | |
| 	ADDQ DX,R8
 | |
| 	MOVQ R8,DX
 | |
| 	SHRQ $51,DX
 | |
| 	ADDQ R10,DX
 | |
| 	MOVQ DX,CX
 | |
| 	SHRQ $51,DX
 | |
| 	ANDQ SI,R8
 | |
| 	ADDQ R12,DX
 | |
| 	MOVQ DX,R9
 | |
| 	SHRQ $51,DX
 | |
| 	ANDQ SI,CX
 | |
| 	ADDQ R14,DX
 | |
| 	MOVQ DX,AX
 | |
| 	SHRQ $51,DX
 | |
| 	ANDQ SI,R9
 | |
| 	ADDQ BX,DX
 | |
| 	MOVQ DX,R10
 | |
| 	SHRQ $51,DX
 | |
| 	ANDQ SI,AX
 | |
| 	IMUL3Q $19,DX,DX
 | |
| 	ADDQ DX,R8
 | |
| 	ANDQ SI,R10
 | |
| 	MOVQ R8,0(DI)
 | |
| 	MOVQ CX,8(DI)
 | |
| 	MOVQ R9,16(DI)
 | |
| 	MOVQ AX,24(DI)
 | |
| 	MOVQ R10,32(DI)
 | |
| 	RET
 |