aboutsummaryrefslogtreecommitdiffstats
path: root/crypto/bn256/cloudflare/mul_bmi2.h
blob: 71ad0499afd59702f95447b38795ccae10729389 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
#define mulBMI2(a0,a1,a2,a3, rb) \
    MOVQ a0, DX \
    MOVQ $0, R13 \
    MULXQ 0+rb, R8, R9 \
    MULXQ 8+rb, AX, R10 \
    ADDQ AX, R9 \
    MULXQ 16+rb, AX, R11 \
    ADCQ AX, R10 \
    MULXQ 24+rb, AX, R12 \
    ADCQ AX, R11 \
    ADCQ $0, R12 \
    ADCQ $0, R13 \
    \
    MOVQ a1, DX \
    MOVQ $0, R14 \
    MULXQ 0+rb, AX, BX \
    ADDQ AX, R9 \
    ADCQ BX, R10 \
    MULXQ 16+rb, AX, BX \
    ADCQ AX, R11 \
    ADCQ BX, R12 \
    ADCQ $0, R13 \
    MULXQ 8+rb, AX, BX \
    ADDQ AX, R10 \
    ADCQ BX, R11 \
    MULXQ 24+rb, AX, BX \
    ADCQ AX, R12 \
    ADCQ BX, R13 \
    ADCQ $0, R14 \
    \
    MOVQ a2, DX \
    MOVQ $0, R15 \
    MULXQ 0+rb, AX, BX \
    ADDQ AX, R10 \
    ADCQ BX, R11 \
    MULXQ 16+rb, AX, BX \
    ADCQ AX, R12 \
    ADCQ BX, R13 \
    ADCQ $0, R14 \
    MULXQ 8+rb, AX, BX \
    ADDQ AX, R11 \
    ADCQ BX, R12 \
    MULXQ 24+rb, AX, BX \
    ADCQ AX, R13 \
    ADCQ BX, R14 \
    ADCQ $0, R15 \
    \
    MOVQ a3, DX \
    MULXQ 0+rb, AX, BX \
    ADDQ AX, R11 \
    ADCQ BX, R12 \
    MULXQ 16+rb, AX, BX \
    ADCQ AX, R13 \
    ADCQ BX, R14 \
    ADCQ $0, R15 \
    MULXQ 8+rb, AX, BX \
    ADDQ AX, R12 \
    ADCQ BX, R13 \
    MULXQ 24+rb, AX, BX \
    ADCQ AX, R14 \
    ADCQ BX, R15

#define gfpReduceBMI2() \
    \ // m = (T * N') mod R, store m in R8:R9:R10:R11
    MOVQ ·np+0(SB), DX \
    MULXQ 0(SP), R8, R9 \
    MULXQ 8(SP), AX, R10 \
    ADDQ AX, R9 \
    MULXQ 16(SP), AX, R11 \
    ADCQ AX, R10 \
    MULXQ 24(SP), AX, BX \
    ADCQ AX, R11 \
    \
    MOVQ ·np+8(SB), DX \
    MULXQ 0(SP), AX, BX \
    ADDQ AX, R9 \
    ADCQ BX, R10 \
    MULXQ 16(SP), AX, BX \
    ADCQ AX, R11 \
    MULXQ 8(SP), AX, BX \
    ADDQ AX, R10 \
    ADCQ BX, R11 \
    \
    MOVQ ·np+16(SB), DX \
    MULXQ 0(SP), AX, BX \
    ADDQ AX, R10 \
    ADCQ BX, R11 \
    MULXQ 8(SP), AX, BX \
    ADDQ AX, R11 \
    \
    MOVQ ·np+24(SB), DX \
    MULXQ 0(SP), AX, BX \
    ADDQ AX, R11 \
    \
    storeBlock(R8,R9,R10,R11, 64(SP)) \
    \
    \ // m * N
    mulBMI2(·p2+0(SB),·p2+8(SB),·p2+16(SB),·p2+24(SB), 64(SP)) \
    \
    \ // Add the 512-bit intermediate to m*N
    MOVQ $0, AX \
    ADDQ 0(SP), R8 \
    ADCQ 8(SP), R9 \
    ADCQ 16(SP), R10 \
    ADCQ 24(SP), R11 \
    ADCQ 32(SP), R12 \
    ADCQ 40(SP), R13 \
    ADCQ 48(SP), R14 \
    ADCQ 56(SP), R15 \
    ADCQ $0, AX \
    \
    gfpCarry(R12,R13,R14,R15,AX, R8,R9,R10,R11,BX)