aboutsummaryrefslogtreecommitdiffstats
path: root/crypto/bn256/cloudflare/gfp_amd64.s
diff options
context:
space:
mode:
Diffstat (limited to 'crypto/bn256/cloudflare/gfp_amd64.s')
-rw-r--r--crypto/bn256/cloudflare/gfp_amd64.s97
1 files changed, 97 insertions, 0 deletions
diff --git a/crypto/bn256/cloudflare/gfp_amd64.s b/crypto/bn256/cloudflare/gfp_amd64.s
new file mode 100644
index 000000000..2d0176f2e
--- /dev/null
+++ b/crypto/bn256/cloudflare/gfp_amd64.s
@@ -0,0 +1,97 @@
+// +build amd64,!appengine,!gccgo
+
+#include "gfp.h"
+#include "mul.h"
+#include "mul_bmi2.h"
+
+TEXT ·gfpNeg(SB),0,$0-16
+ MOVQ ·p2+0(SB), R8
+ MOVQ ·p2+8(SB), R9
+ MOVQ ·p2+16(SB), R10
+ MOVQ ·p2+24(SB), R11
+
+ MOVQ a+8(FP), DI
+ SUBQ 0(DI), R8
+ SBBQ 8(DI), R9
+ SBBQ 16(DI), R10
+ SBBQ 24(DI), R11
+
+ MOVQ $0, AX
+ gfpCarry(R8,R9,R10,R11,AX, R12,R13,R14,R15,BX)
+
+ MOVQ c+0(FP), DI
+ storeBlock(R8,R9,R10,R11, 0(DI))
+ RET
+
+TEXT ·gfpAdd(SB),0,$0-24
+ MOVQ a+8(FP), DI
+ MOVQ b+16(FP), SI
+
+ loadBlock(0(DI), R8,R9,R10,R11)
+ MOVQ $0, R12
+
+ ADDQ 0(SI), R8
+ ADCQ 8(SI), R9
+ ADCQ 16(SI), R10
+ ADCQ 24(SI), R11
+ ADCQ $0, R12
+
+ gfpCarry(R8,R9,R10,R11,R12, R13,R14,R15,AX,BX)
+
+ MOVQ c+0(FP), DI
+ storeBlock(R8,R9,R10,R11, 0(DI))
+ RET
+
+TEXT ·gfpSub(SB),0,$0-24
+ MOVQ a+8(FP), DI
+ MOVQ b+16(FP), SI
+
+ loadBlock(0(DI), R8,R9,R10,R11)
+
+ MOVQ ·p2+0(SB), R12
+ MOVQ ·p2+8(SB), R13
+ MOVQ ·p2+16(SB), R14
+ MOVQ ·p2+24(SB), R15
+ MOVQ $0, AX
+
+ SUBQ 0(SI), R8
+ SBBQ 8(SI), R9
+ SBBQ 16(SI), R10
+ SBBQ 24(SI), R11
+
+ CMOVQCC AX, R12
+ CMOVQCC AX, R13
+ CMOVQCC AX, R14
+ CMOVQCC AX, R15
+
+ ADDQ R12, R8
+ ADCQ R13, R9
+ ADCQ R14, R10
+ ADCQ R15, R11
+
+ MOVQ c+0(FP), DI
+ storeBlock(R8,R9,R10,R11, 0(DI))
+ RET
+
+TEXT ·gfpMul(SB),0,$160-24
+ MOVQ a+8(FP), DI
+ MOVQ b+16(FP), SI
+
+ // Jump to a slightly different implementation if MULX isn't supported.
+ CMPB runtime·support_bmi2(SB), $0
+ JE nobmi2Mul
+
+ mulBMI2(0(DI),8(DI),16(DI),24(DI), 0(SI))
+ storeBlock( R8, R9,R10,R11, 0(SP))
+ storeBlock(R12,R13,R14,R15, 32(SP))
+ gfpReduceBMI2()
+ JMP end
+
+nobmi2Mul:
+ mul(0(DI),8(DI),16(DI),24(DI), 0(SI), 0(SP))
+ gfpReduce(0(SP))
+
+end:
+ MOVQ c+0(FP), DI
+ storeBlock(R12,R13,R14,R15, 0(DI))
+ RET