From bd6879ac518431174a490ba42f7e6e822dcb3ee1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?P=C3=A9ter=20Szil=C3=A1gyi?= Date: Mon, 5 Mar 2018 14:33:45 +0200 Subject: core/vm, crypto/bn256: switch over to cloudflare library (#16203) * core/vm, crypto/bn256: switch over to cloudflare library * crypto/bn256: unmarshal constraint + start pure go impl * crypto/bn256: combo cloudflare and google lib * travis: drop 386 test job --- crypto/bn256/cloudflare/mul_bmi2.h | 112 +++++++++++++++++++++++++++++++++++++ 1 file changed, 112 insertions(+) create mode 100644 crypto/bn256/cloudflare/mul_bmi2.h (limited to 'crypto/bn256/cloudflare/mul_bmi2.h') diff --git a/crypto/bn256/cloudflare/mul_bmi2.h b/crypto/bn256/cloudflare/mul_bmi2.h new file mode 100644 index 000000000..71ad0499a --- /dev/null +++ b/crypto/bn256/cloudflare/mul_bmi2.h @@ -0,0 +1,112 @@ +#define mulBMI2(a0,a1,a2,a3, rb) \ + MOVQ a0, DX \ + MOVQ $0, R13 \ + MULXQ 0+rb, R8, R9 \ + MULXQ 8+rb, AX, R10 \ + ADDQ AX, R9 \ + MULXQ 16+rb, AX, R11 \ + ADCQ AX, R10 \ + MULXQ 24+rb, AX, R12 \ + ADCQ AX, R11 \ + ADCQ $0, R12 \ + ADCQ $0, R13 \ + \ + MOVQ a1, DX \ + MOVQ $0, R14 \ + MULXQ 0+rb, AX, BX \ + ADDQ AX, R9 \ + ADCQ BX, R10 \ + MULXQ 16+rb, AX, BX \ + ADCQ AX, R11 \ + ADCQ BX, R12 \ + ADCQ $0, R13 \ + MULXQ 8+rb, AX, BX \ + ADDQ AX, R10 \ + ADCQ BX, R11 \ + MULXQ 24+rb, AX, BX \ + ADCQ AX, R12 \ + ADCQ BX, R13 \ + ADCQ $0, R14 \ + \ + MOVQ a2, DX \ + MOVQ $0, R15 \ + MULXQ 0+rb, AX, BX \ + ADDQ AX, R10 \ + ADCQ BX, R11 \ + MULXQ 16+rb, AX, BX \ + ADCQ AX, R12 \ + ADCQ BX, R13 \ + ADCQ $0, R14 \ + MULXQ 8+rb, AX, BX \ + ADDQ AX, R11 \ + ADCQ BX, R12 \ + MULXQ 24+rb, AX, BX \ + ADCQ AX, R13 \ + ADCQ BX, R14 \ + ADCQ $0, R15 \ + \ + MOVQ a3, DX \ + MULXQ 0+rb, AX, BX \ + ADDQ AX, R11 \ + ADCQ BX, R12 \ + MULXQ 16+rb, AX, BX \ + ADCQ AX, R13 \ + ADCQ BX, R14 \ + ADCQ $0, R15 \ + MULXQ 8+rb, AX, BX \ + ADDQ AX, R12 \ + ADCQ BX, R13 \ + MULXQ 24+rb, AX, BX \ + ADCQ AX, R14 \ + ADCQ BX, R15 + +#define gfpReduceBMI2() \ + \ // m = (T * N') mod R, store m in R8:R9:R10:R11 + MOVQ ·np+0(SB), DX \ + MULXQ 0(SP), R8, R9 \ + MULXQ 8(SP), AX, R10 \ + ADDQ AX, R9 \ + MULXQ 16(SP), AX, R11 \ + ADCQ AX, R10 \ + MULXQ 24(SP), AX, BX \ + ADCQ AX, R11 \ + \ + MOVQ ·np+8(SB), DX \ + MULXQ 0(SP), AX, BX \ + ADDQ AX, R9 \ + ADCQ BX, R10 \ + MULXQ 16(SP), AX, BX \ + ADCQ AX, R11 \ + MULXQ 8(SP), AX, BX \ + ADDQ AX, R10 \ + ADCQ BX, R11 \ + \ + MOVQ ·np+16(SB), DX \ + MULXQ 0(SP), AX, BX \ + ADDQ AX, R10 \ + ADCQ BX, R11 \ + MULXQ 8(SP), AX, BX \ + ADDQ AX, R11 \ + \ + MOVQ ·np+24(SB), DX \ + MULXQ 0(SP), AX, BX \ + ADDQ AX, R11 \ + \ + storeBlock(R8,R9,R10,R11, 64(SP)) \ + \ + \ // m * N + mulBMI2(·p2+0(SB),·p2+8(SB),·p2+16(SB),·p2+24(SB), 64(SP)) \ + \ + \ // Add the 512-bit intermediate to m*N + MOVQ $0, AX \ + ADDQ 0(SP), R8 \ + ADCQ 8(SP), R9 \ + ADCQ 16(SP), R10 \ + ADCQ 24(SP), R11 \ + ADCQ 32(SP), R12 \ + ADCQ 40(SP), R13 \ + ADCQ 48(SP), R14 \ + ADCQ 56(SP), R15 \ + ADCQ $0, AX \ + \ + gfpCarry(R12,R13,R14,R15,AX, R8,R9,R10,R11,BX) -- cgit v1.2.3