diff options
author | Péter Szilágyi <peterke@gmail.com> | 2018-03-20 00:13:54 +0800 |
---|---|---|
committer | GitHub <noreply@github.com> | 2018-03-20 00:13:54 +0800 |
commit | 1203c6a237cb87b78ec495772cecb178200499ce (patch) | |
tree | a51e6c3a24e43f265fc5c9b4f2bdb7ff7de6a8db /crypto/bn256/cloudflare | |
parent | 0965761a45562d609f6036963dbac84561174677 (diff) | |
download | dexon-1203c6a237cb87b78ec495772cecb178200499ce.tar dexon-1203c6a237cb87b78ec495772cecb178200499ce.tar.gz dexon-1203c6a237cb87b78ec495772cecb178200499ce.tar.bz2 dexon-1203c6a237cb87b78ec495772cecb178200499ce.tar.lz dexon-1203c6a237cb87b78ec495772cecb178200499ce.tar.xz dexon-1203c6a237cb87b78ec495772cecb178200499ce.tar.zst dexon-1203c6a237cb87b78ec495772cecb178200499ce.zip |
crypto/bn256: full switchover to cloudflare's code (#16301)
* crypto/bn256: full switchover to cloudflare's code
* crypto/bn256: only use cloudflare for optimized architectures
* crypto/bn256: upstream fallback for non-optimized code
* .travis, build: drop support for Go 1.8 (need type aliases)
* crypto/bn256/cloudflare: enable curve mul lattice optimization
Diffstat (limited to 'crypto/bn256/cloudflare')
-rw-r--r-- | crypto/bn256/cloudflare/bn256_test.go | 2 | ||||
-rw-r--r-- | crypto/bn256/cloudflare/curve.go | 19 | ||||
-rw-r--r-- | crypto/bn256/cloudflare/example_test.go | 2 | ||||
-rw-r--r-- | crypto/bn256/cloudflare/gfp.h | 32 | ||||
-rw-r--r-- | crypto/bn256/cloudflare/gfp_amd64.go | 15 | ||||
-rw-r--r-- | crypto/bn256/cloudflare/gfp_amd64.s | 42 | ||||
-rw-r--r-- | crypto/bn256/cloudflare/gfp_arm64.s | 113 | ||||
-rw-r--r-- | crypto/bn256/cloudflare/gfp_decl.go | 18 | ||||
-rw-r--r-- | crypto/bn256/cloudflare/gfp_generic.go | 173 | ||||
-rw-r--r-- | crypto/bn256/cloudflare/gfp_pure.go | 19 | ||||
-rw-r--r-- | crypto/bn256/cloudflare/gfp_test.go | 2 | ||||
-rw-r--r-- | crypto/bn256/cloudflare/lattice.go | 115 | ||||
-rw-r--r-- | crypto/bn256/cloudflare/lattice_test.go | 29 | ||||
-rw-r--r-- | crypto/bn256/cloudflare/main_test.go | 2 | ||||
-rw-r--r-- | crypto/bn256/cloudflare/mul_amd64.h (renamed from crypto/bn256/cloudflare/mul.h) | 0 | ||||
-rw-r--r-- | crypto/bn256/cloudflare/mul_arm64.h | 133 | ||||
-rw-r--r-- | crypto/bn256/cloudflare/mul_bmi2_amd64.h (renamed from crypto/bn256/cloudflare/mul_bmi2.h) | 0 |
17 files changed, 632 insertions, 84 deletions
diff --git a/crypto/bn256/cloudflare/bn256_test.go b/crypto/bn256/cloudflare/bn256_test.go index 369a3edaa..0c8016d86 100644 --- a/crypto/bn256/cloudflare/bn256_test.go +++ b/crypto/bn256/cloudflare/bn256_test.go @@ -1,5 +1,3 @@ -// +build amd64,!appengine,!gccgo - package bn256 import ( diff --git a/crypto/bn256/cloudflare/curve.go b/crypto/bn256/cloudflare/curve.go index b6aecc0a6..18e9b38f3 100644 --- a/crypto/bn256/cloudflare/curve.go +++ b/crypto/bn256/cloudflare/curve.go @@ -183,15 +183,24 @@ func (c *curvePoint) Double(a *curvePoint) { } func (c *curvePoint) Mul(a *curvePoint, scalar *big.Int) { - sum, t := &curvePoint{}, &curvePoint{} + precomp := [1 << 2]*curvePoint{nil, {}, {}, {}} + precomp[1].Set(a) + precomp[2].Set(a) + gfpMul(&precomp[2].x, &precomp[2].x, xiTo2PSquaredMinus2Over3) + precomp[3].Add(precomp[1], precomp[2]) + + multiScalar := curveLattice.Multi(scalar) + + sum := &curvePoint{} sum.SetInfinity() + t := &curvePoint{} - for i := scalar.BitLen(); i >= 0; i-- { + for i := len(multiScalar) - 1; i >= 0; i-- { t.Double(sum) - if scalar.Bit(i) != 0 { - sum.Add(t, a) - } else { + if multiScalar[i] == 0 { sum.Set(t) + } else { + sum.Add(t, precomp[multiScalar[i]]) } } c.Set(sum) diff --git a/crypto/bn256/cloudflare/example_test.go b/crypto/bn256/cloudflare/example_test.go index 2ee545c67..b2d19807a 100644 --- a/crypto/bn256/cloudflare/example_test.go +++ b/crypto/bn256/cloudflare/example_test.go @@ -2,8 +2,6 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build amd64,!appengine,!gccgo - package bn256 import ( diff --git a/crypto/bn256/cloudflare/gfp.h b/crypto/bn256/cloudflare/gfp.h deleted file mode 100644 index 66f5a4d07..000000000 --- a/crypto/bn256/cloudflare/gfp.h +++ /dev/null @@ -1,32 +0,0 @@ -#define storeBlock(a0,a1,a2,a3, r) \ - MOVQ a0, 0+r \ - MOVQ a1, 8+r \ - MOVQ a2, 16+r \ - MOVQ a3, 24+r - -#define loadBlock(r, a0,a1,a2,a3) \ - MOVQ 0+r, a0 \ - MOVQ 8+r, a1 \ - MOVQ 16+r, a2 \ - MOVQ 24+r, a3 - -#define gfpCarry(a0,a1,a2,a3,a4, b0,b1,b2,b3,b4) \ - \ // b = a-p - MOVQ a0, b0 \ - MOVQ a1, b1 \ - MOVQ a2, b2 \ - MOVQ a3, b3 \ - MOVQ a4, b4 \ - \ - SUBQ ·p2+0(SB), b0 \ - SBBQ ·p2+8(SB), b1 \ - SBBQ ·p2+16(SB), b2 \ - SBBQ ·p2+24(SB), b3 \ - SBBQ $0, b4 \ - \ - \ // if b is negative then return a - \ // else return b - CMOVQCC b0, a0 \ - CMOVQCC b1, a1 \ - CMOVQCC b2, a2 \ - CMOVQCC b3, a3 diff --git a/crypto/bn256/cloudflare/gfp_amd64.go b/crypto/bn256/cloudflare/gfp_amd64.go deleted file mode 100644 index ac4f1a9c6..000000000 --- a/crypto/bn256/cloudflare/gfp_amd64.go +++ /dev/null @@ -1,15 +0,0 @@ -// +build amd64,!appengine,!gccgo - -package bn256 - -// go:noescape -func gfpNeg(c, a *gfP) - -//go:noescape -func gfpAdd(c, a, b *gfP) - -//go:noescape -func gfpSub(c, a, b *gfP) - -//go:noescape -func gfpMul(c, a, b *gfP) diff --git a/crypto/bn256/cloudflare/gfp_amd64.s b/crypto/bn256/cloudflare/gfp_amd64.s index 2d0176f2e..3a785d200 100644 --- a/crypto/bn256/cloudflare/gfp_amd64.s +++ b/crypto/bn256/cloudflare/gfp_amd64.s @@ -1,8 +1,40 @@ -// +build amd64,!appengine,!gccgo - -#include "gfp.h" -#include "mul.h" -#include "mul_bmi2.h" +// +build amd64,!generic + +#define storeBlock(a0,a1,a2,a3, r) \ + MOVQ a0, 0+r \ + MOVQ a1, 8+r \ + MOVQ a2, 16+r \ + MOVQ a3, 24+r + +#define loadBlock(r, a0,a1,a2,a3) \ + MOVQ 0+r, a0 \ + MOVQ 8+r, a1 \ + MOVQ 16+r, a2 \ + MOVQ 24+r, a3 + +#define gfpCarry(a0,a1,a2,a3,a4, b0,b1,b2,b3,b4) \ + \ // b = a-p + MOVQ a0, b0 \ + MOVQ a1, b1 \ + MOVQ a2, b2 \ + MOVQ a3, b3 \ + MOVQ a4, b4 \ + \ + SUBQ ·p2+0(SB), b0 \ + SBBQ ·p2+8(SB), b1 \ + SBBQ ·p2+16(SB), b2 \ + SBBQ ·p2+24(SB), b3 \ + SBBQ $0, b4 \ + \ + \ // if b is negative then return a + \ // else return b + CMOVQCC b0, a0 \ + CMOVQCC b1, a1 \ + CMOVQCC b2, a2 \ + CMOVQCC b3, a3 + +#include "mul_amd64.h" +#include "mul_bmi2_amd64.h" TEXT ·gfpNeg(SB),0,$0-16 MOVQ ·p2+0(SB), R8 diff --git a/crypto/bn256/cloudflare/gfp_arm64.s b/crypto/bn256/cloudflare/gfp_arm64.s new file mode 100644 index 000000000..c65e80168 --- /dev/null +++ b/crypto/bn256/cloudflare/gfp_arm64.s @@ -0,0 +1,113 @@ +// +build arm64,!generic + +#define storeBlock(a0,a1,a2,a3, r) \ + MOVD a0, 0+r \ + MOVD a1, 8+r \ + MOVD a2, 16+r \ + MOVD a3, 24+r + +#define loadBlock(r, a0,a1,a2,a3) \ + MOVD 0+r, a0 \ + MOVD 8+r, a1 \ + MOVD 16+r, a2 \ + MOVD 24+r, a3 + +#define loadModulus(p0,p1,p2,p3) \ + MOVD ·p2+0(SB), p0 \ + MOVD ·p2+8(SB), p1 \ + MOVD ·p2+16(SB), p2 \ + MOVD ·p2+24(SB), p3 + +#include "mul_arm64.h" + +TEXT ·gfpNeg(SB),0,$0-16 + MOVD a+8(FP), R0 + loadBlock(0(R0), R1,R2,R3,R4) + loadModulus(R5,R6,R7,R8) + + SUBS R1, R5, R1 + SBCS R2, R6, R2 + SBCS R3, R7, R3 + SBCS R4, R8, R4 + + SUBS R5, R1, R5 + SBCS R6, R2, R6 + SBCS R7, R3, R7 + SBCS R8, R4, R8 + + CSEL CS, R5, R1, R1 + CSEL CS, R6, R2, R2 + CSEL CS, R7, R3, R3 + CSEL CS, R8, R4, R4 + + MOVD c+0(FP), R0 + storeBlock(R1,R2,R3,R4, 0(R0)) + RET + +TEXT ·gfpAdd(SB),0,$0-24 + MOVD a+8(FP), R0 + loadBlock(0(R0), R1,R2,R3,R4) + MOVD b+16(FP), R0 + loadBlock(0(R0), R5,R6,R7,R8) + loadModulus(R9,R10,R11,R12) + MOVD ZR, R0 + + ADDS R5, R1 + ADCS R6, R2 + ADCS R7, R3 + ADCS R8, R4 + ADCS ZR, R0 + + SUBS R9, R1, R5 + SBCS R10, R2, R6 + SBCS R11, R3, R7 + SBCS R12, R4, R8 + SBCS ZR, R0, R0 + + CSEL CS, R5, R1, R1 + CSEL CS, R6, R2, R2 + CSEL CS, R7, R3, R3 + CSEL CS, R8, R4, R4 + + MOVD c+0(FP), R0 + storeBlock(R1,R2,R3,R4, 0(R0)) + RET + +TEXT ·gfpSub(SB),0,$0-24 + MOVD a+8(FP), R0 + loadBlock(0(R0), R1,R2,R3,R4) + MOVD b+16(FP), R0 + loadBlock(0(R0), R5,R6,R7,R8) + loadModulus(R9,R10,R11,R12) + + SUBS R5, R1 + SBCS R6, R2 + SBCS R7, R3 + SBCS R8, R4 + + CSEL CS, ZR, R9, R9 + CSEL CS, ZR, R10, R10 + CSEL CS, ZR, R11, R11 + CSEL CS, ZR, R12, R12 + + ADDS R9, R1 + ADCS R10, R2 + ADCS R11, R3 + ADCS R12, R4 + + MOVD c+0(FP), R0 + storeBlock(R1,R2,R3,R4, 0(R0)) + RET + +TEXT ·gfpMul(SB),0,$0-24 + MOVD a+8(FP), R0 + loadBlock(0(R0), R1,R2,R3,R4) + MOVD b+16(FP), R0 + loadBlock(0(R0), R5,R6,R7,R8) + + mul(R9,R10,R11,R12,R13,R14,R15,R16) + gfpReduce() + + MOVD c+0(FP), R0 + storeBlock(R1,R2,R3,R4, 0(R0)) + RET diff --git a/crypto/bn256/cloudflare/gfp_decl.go b/crypto/bn256/cloudflare/gfp_decl.go new file mode 100644 index 000000000..6a8a4fddb --- /dev/null +++ b/crypto/bn256/cloudflare/gfp_decl.go @@ -0,0 +1,18 @@ +// +build amd64,!generic arm64,!generic + +package bn256 + +// This file contains forward declarations for the architecture-specific +// assembly implementations of these functions, provided that they exist. + +// go:noescape +func gfpNeg(c, a *gfP) + +//go:noescape +func gfpAdd(c, a, b *gfP) + +//go:noescape +func gfpSub(c, a, b *gfP) + +//go:noescape +func gfpMul(c, a, b *gfP) diff --git a/crypto/bn256/cloudflare/gfp_generic.go b/crypto/bn256/cloudflare/gfp_generic.go new file mode 100644 index 000000000..8e6be9596 --- /dev/null +++ b/crypto/bn256/cloudflare/gfp_generic.go @@ -0,0 +1,173 @@ +// +build !amd64,!arm64 generic + +package bn256 + +func gfpCarry(a *gfP, head uint64) { + b := &gfP{} + + var carry uint64 + for i, pi := range p2 { + ai := a[i] + bi := ai - pi - carry + b[i] = bi + carry = (pi&^ai | (pi|^ai)&bi) >> 63 + } + carry = carry &^ head + + // If b is negative, then return a. + // Else return b. + carry = -carry + ncarry := ^carry + for i := 0; i < 4; i++ { + a[i] = (a[i] & carry) | (b[i] & ncarry) + } +} + +func gfpNeg(c, a *gfP) { + var carry uint64 + for i, pi := range p2 { + ai := a[i] + ci := pi - ai - carry + c[i] = ci + carry = (ai&^pi | (ai|^pi)&ci) >> 63 + } + gfpCarry(c, 0) +} + +func gfpAdd(c, a, b *gfP) { + var carry uint64 + for i, ai := range a { + bi := b[i] + ci := ai + bi + carry + c[i] = ci + carry = (ai&bi | (ai|bi)&^ci) >> 63 + } + gfpCarry(c, carry) +} + +func gfpSub(c, a, b *gfP) { + t := &gfP{} + + var carry uint64 + for i, pi := range p2 { + bi := b[i] + ti := pi - bi - carry + t[i] = ti + carry = (bi&^pi | (bi|^pi)&ti) >> 63 + } + + carry = 0 + for i, ai := range a { + ti := t[i] + ci := ai + ti + carry + c[i] = ci + carry = (ai&ti | (ai|ti)&^ci) >> 63 + } + gfpCarry(c, carry) +} + +func mul(a, b [4]uint64) [8]uint64 { + const ( + mask16 uint64 = 0x0000ffff + mask32 uint64 = 0xffffffff + ) + + var buff [32]uint64 + for i, ai := range a { + a0, a1, a2, a3 := ai&mask16, (ai>>16)&mask16, (ai>>32)&mask16, ai>>48 + + for j, bj := range b { + b0, b2 := bj&mask32, bj>>32 + + off := 4 * (i + j) + buff[off+0] += a0 * b0 + buff[off+1] += a1 * b0 + buff[off+2] += a2*b0 + a0*b2 + buff[off+3] += a3*b0 + a1*b2 + buff[off+4] += a2 * b2 + buff[off+5] += a3 * b2 + } + } + + for i := uint(1); i < 4; i++ { + shift := 16 * i + + var head, carry uint64 + for j := uint(0); j < 8; j++ { + block := 4 * j + + xi := buff[block] + yi := (buff[block+i] << shift) + head + zi := xi + yi + carry + buff[block] = zi + carry = (xi&yi | (xi|yi)&^zi) >> 63 + + head = buff[block+i] >> (64 - shift) + } + } + + return [8]uint64{buff[0], buff[4], buff[8], buff[12], buff[16], buff[20], buff[24], buff[28]} +} + +func halfMul(a, b [4]uint64) [4]uint64 { + const ( + mask16 uint64 = 0x0000ffff + mask32 uint64 = 0xffffffff + ) + + var buff [18]uint64 + for i, ai := range a { + a0, a1, a2, a3 := ai&mask16, (ai>>16)&mask16, (ai>>32)&mask16, ai>>48 + + for j, bj := range b { + if i+j > 3 { + break + } + b0, b2 := bj&mask32, bj>>32 + + off := 4 * (i + j) + buff[off+0] += a0 * b0 + buff[off+1] += a1 * b0 + buff[off+2] += a2*b0 + a0*b2 + buff[off+3] += a3*b0 + a1*b2 + buff[off+4] += a2 * b2 + buff[off+5] += a3 * b2 + } + } + + for i := uint(1); i < 4; i++ { + shift := 16 * i + + var head, carry uint64 + for j := uint(0); j < 4; j++ { + block := 4 * j + + xi := buff[block] + yi := (buff[block+i] << shift) + head + zi := xi + yi + carry + buff[block] = zi + carry = (xi&yi | (xi|yi)&^zi) >> 63 + + head = buff[block+i] >> (64 - shift) + } + } + + return [4]uint64{buff[0], buff[4], buff[8], buff[12]} +} + +func gfpMul(c, a, b *gfP) { + T := mul(*a, *b) + m := halfMul([4]uint64{T[0], T[1], T[2], T[3]}, np) + t := mul([4]uint64{m[0], m[1], m[2], m[3]}, p2) + + var carry uint64 + for i, Ti := range T { + ti := t[i] + zi := Ti + ti + carry + T[i] = zi + carry = (Ti&ti | (Ti|ti)&^zi) >> 63 + } + + *c = gfP{T[4], T[5], T[6], T[7]} + gfpCarry(c, carry) +} diff --git a/crypto/bn256/cloudflare/gfp_pure.go b/crypto/bn256/cloudflare/gfp_pure.go deleted file mode 100644 index 8fa5d3053..000000000 --- a/crypto/bn256/cloudflare/gfp_pure.go +++ /dev/null @@ -1,19 +0,0 @@ -// +build !amd64 appengine gccgo - -package bn256 - -func gfpNeg(c, a *gfP) { - panic("unsupported architecture") -} - -func gfpAdd(c, a, b *gfP) { - panic("unsupported architecture") -} - -func gfpSub(c, a, b *gfP) { - panic("unsupported architecture") -} - -func gfpMul(c, a, b *gfP) { - panic("unsupported architecture") -} diff --git a/crypto/bn256/cloudflare/gfp_test.go b/crypto/bn256/cloudflare/gfp_test.go index aff5e0531..16ab2a841 100644 --- a/crypto/bn256/cloudflare/gfp_test.go +++ b/crypto/bn256/cloudflare/gfp_test.go @@ -1,5 +1,3 @@ -// +build amd64,!appengine,!gccgo - package bn256 import ( diff --git a/crypto/bn256/cloudflare/lattice.go b/crypto/bn256/cloudflare/lattice.go new file mode 100644 index 000000000..f9ace4d9f --- /dev/null +++ b/crypto/bn256/cloudflare/lattice.go @@ -0,0 +1,115 @@ +package bn256 + +import ( + "math/big" +) + +var half = new(big.Int).Rsh(Order, 1) + +var curveLattice = &lattice{ + vectors: [][]*big.Int{ + {bigFromBase10("147946756881789319000765030803803410728"), bigFromBase10("147946756881789319010696353538189108491")}, + {bigFromBase10("147946756881789319020627676272574806254"), bigFromBase10("-147946756881789318990833708069417712965")}, + }, + inverse: []*big.Int{ + bigFromBase10("147946756881789318990833708069417712965"), + bigFromBase10("147946756881789319010696353538189108491"), + }, + det: bigFromBase10("43776485743678550444492811490514550177096728800832068687396408373151616991234"), +} + +var targetLattice = &lattice{ + vectors: [][]*big.Int{ + {bigFromBase10("9931322734385697761"), bigFromBase10("9931322734385697761"), bigFromBase10("9931322734385697763"), bigFromBase10("9931322734385697764")}, + {bigFromBase10("4965661367192848881"), bigFromBase10("4965661367192848881"), bigFromBase10("4965661367192848882"), bigFromBase10("-9931322734385697762")}, + {bigFromBase10("-9931322734385697762"), bigFromBase10("-4965661367192848881"), bigFromBase10("4965661367192848881"), bigFromBase10("-4965661367192848882")}, + {bigFromBase10("9931322734385697763"), bigFromBase10("-4965661367192848881"), bigFromBase10("-4965661367192848881"), bigFromBase10("-4965661367192848881")}, + }, + inverse: []*big.Int{ + bigFromBase10("734653495049373973658254490726798021314063399421879442165"), + bigFromBase10("147946756881789319000765030803803410728"), + bigFromBase10("-147946756881789319005730692170996259609"), + bigFromBase10("1469306990098747947464455738335385361643788813749140841702"), + }, + det: new(big.Int).Set(Order), +} + +type lattice struct { + vectors [][]*big.Int + inverse []*big.Int + det *big.Int +} + +// decompose takes a scalar mod Order as input and finds a short, positive decomposition of it wrt to the lattice basis. +func (l *lattice) decompose(k *big.Int) []*big.Int { + n := len(l.inverse) + + // Calculate closest vector in lattice to <k,0,0,...> with Babai's rounding. + c := make([]*big.Int, n) + for i := 0; i < n; i++ { + c[i] = new(big.Int).Mul(k, l.inverse[i]) + round(c[i], l.det) + } + + // Transform vectors according to c and subtract <k,0,0,...>. + out := make([]*big.Int, n) + temp := new(big.Int) + + for i := 0; i < n; i++ { + out[i] = new(big.Int) + + for j := 0; j < n; j++ { + temp.Mul(c[j], l.vectors[j][i]) + out[i].Add(out[i], temp) + } + + out[i].Neg(out[i]) + out[i].Add(out[i], l.vectors[0][i]).Add(out[i], l.vectors[0][i]) + } + out[0].Add(out[0], k) + + return out +} + +func (l *lattice) Precompute(add func(i, j uint)) { + n := uint(len(l.vectors)) + total := uint(1) << n + + for i := uint(0); i < n; i++ { + for j := uint(0); j < total; j++ { + if (j>>i)&1 == 1 { + add(i, j) + } + } + } +} + +func (l *lattice) Multi(scalar *big.Int) []uint8 { + decomp := l.decompose(scalar) + + maxLen := 0 + for _, x := range decomp { + if x.BitLen() > maxLen { + maxLen = x.BitLen() + } + } + + out := make([]uint8, maxLen) + for j, x := range decomp { + for i := 0; i < maxLen; i++ { + out[i] += uint8(x.Bit(i)) << uint(j) + } + } + + return out +} + +// round sets num to num/denom rounded to the nearest integer. +func round(num, denom *big.Int) { + r := new(big.Int) + num.DivMod(num, denom, r) + + if r.Cmp(half) == 1 { + num.Add(num, big.NewInt(1)) + } +} diff --git a/crypto/bn256/cloudflare/lattice_test.go b/crypto/bn256/cloudflare/lattice_test.go new file mode 100644 index 000000000..4d52ad9b2 --- /dev/null +++ b/crypto/bn256/cloudflare/lattice_test.go @@ -0,0 +1,29 @@ +package bn256 + +import ( + "crypto/rand" + + "testing" +) + +func TestLatticeReduceCurve(t *testing.T) { + k, _ := rand.Int(rand.Reader, Order) + ks := curveLattice.decompose(k) + + if ks[0].BitLen() > 130 || ks[1].BitLen() > 130 { + t.Fatal("reduction too large") + } else if ks[0].Sign() < 0 || ks[1].Sign() < 0 { + t.Fatal("reduction must be positive") + } +} + +func TestLatticeReduceTarget(t *testing.T) { + k, _ := rand.Int(rand.Reader, Order) + ks := targetLattice.decompose(k) + + if ks[0].BitLen() > 66 || ks[1].BitLen() > 66 || ks[2].BitLen() > 66 || ks[3].BitLen() > 66 { + t.Fatal("reduction too large") + } else if ks[0].Sign() < 0 || ks[1].Sign() < 0 || ks[2].Sign() < 0 || ks[3].Sign() < 0 { + t.Fatal("reduction must be positive") + } +} diff --git a/crypto/bn256/cloudflare/main_test.go b/crypto/bn256/cloudflare/main_test.go index f0d59a404..0230f1b19 100644 --- a/crypto/bn256/cloudflare/main_test.go +++ b/crypto/bn256/cloudflare/main_test.go @@ -1,5 +1,3 @@ -// +build amd64,!appengine,!gccgo - package bn256 import ( diff --git a/crypto/bn256/cloudflare/mul.h b/crypto/bn256/cloudflare/mul_amd64.h index bab5da831..bab5da831 100644 --- a/crypto/bn256/cloudflare/mul.h +++ b/crypto/bn256/cloudflare/mul_amd64.h diff --git a/crypto/bn256/cloudflare/mul_arm64.h b/crypto/bn256/cloudflare/mul_arm64.h new file mode 100644 index 000000000..75d522173 --- /dev/null +++ b/crypto/bn256/cloudflare/mul_arm64.h @@ -0,0 +1,133 @@ +#define mul(c0,c1,c2,c3,c4,c5,c6,c7) \ + MUL R1, R5, c0 \ + UMULH R1, R5, c1 \ + MUL R1, R6, R0 \ + ADDS R0, c1 \ + UMULH R1, R6, c2 \ + MUL R1, R7, R0 \ + ADCS R0, c2 \ + UMULH R1, R7, c3 \ + MUL R1, R8, R0 \ + ADCS R0, c3 \ + UMULH R1, R8, c4 \ + ADCS ZR, c4 \ + \ + MUL R2, R5, R25 \ + UMULH R2, R5, R26 \ + MUL R2, R6, R0 \ + ADDS R0, R26 \ + UMULH R2, R6, R27 \ + MUL R2, R7, R0 \ + ADCS R0, R27 \ + UMULH R2, R7, R29 \ + MUL R2, R8, R0 \ + ADCS R0, R29 \ + UMULH R2, R8, c5 \ + ADCS ZR, c5 \ + ADDS R25, c1 \ + ADCS R26, c2 \ + ADCS R27, c3 \ + ADCS R29, c4 \ + ADCS ZR, c5 \ + \ + MUL R3, R5, R25 \ + UMULH R3, R5, R26 \ + MUL R3, R6, R0 \ + ADDS R0, R26 \ + UMULH R3, R6, R27 \ + MUL R3, R7, R0 \ + ADCS R0, R27 \ + UMULH R3, R7, R29 \ + MUL R3, R8, R0 \ + ADCS R0, R29 \ + UMULH R3, R8, c6 \ + ADCS ZR, c6 \ + ADDS R25, c2 \ + ADCS R26, c3 \ + ADCS R27, c4 \ + ADCS R29, c5 \ + ADCS ZR, c6 \ + \ + MUL R4, R5, R25 \ + UMULH R4, R5, R26 \ + MUL R4, R6, R0 \ + ADDS R0, R26 \ + UMULH R4, R6, R27 \ + MUL R4, R7, R0 \ + ADCS R0, R27 \ + UMULH R4, R7, R29 \ + MUL R4, R8, R0 \ + ADCS R0, R29 \ + UMULH R4, R8, c7 \ + ADCS ZR, c7 \ + ADDS R25, c3 \ + ADCS R26, c4 \ + ADCS R27, c5 \ + ADCS R29, c6 \ + ADCS ZR, c7 + +#define gfpReduce() \ + \ // m = (T * N') mod R, store m in R1:R2:R3:R4 + MOVD ·np+0(SB), R17 \ + MOVD ·np+8(SB), R18 \ + MOVD ·np+16(SB), R19 \ + MOVD ·np+24(SB), R20 \ + \ + MUL R9, R17, R1 \ + UMULH R9, R17, R2 \ + MUL R9, R18, R0 \ + ADDS R0, R2 \ + UMULH R9, R18, R3 \ + MUL R9, R19, R0 \ + ADCS R0, R3 \ + UMULH R9, R19, R4 \ + MUL R9, R20, R0 \ + ADCS R0, R4 \ + \ + MUL R10, R17, R21 \ + UMULH R10, R17, R22 \ + MUL R10, R18, R0 \ + ADDS R0, R22 \ + UMULH R10, R18, R23 \ + MUL R10, R19, R0 \ + ADCS R0, R23 \ + ADDS R21, R2 \ + ADCS R22, R3 \ + ADCS R23, R4 \ + \ + MUL R11, R17, R21 \ + UMULH R11, R17, R22 \ + MUL R11, R18, R0 \ + ADDS R0, R22 \ + ADDS R21, R3 \ + ADCS R22, R4 \ + \ + MUL R12, R17, R21 \ + ADDS R21, R4 \ + \ + \ // m * N + loadModulus(R5,R6,R7,R8) \ + mul(R17,R18,R19,R20,R21,R22,R23,R24) \ + \ + \ // Add the 512-bit intermediate to m*N + MOVD ZR, R25 \ + ADDS R9, R17 \ + ADCS R10, R18 \ + ADCS R11, R19 \ + ADCS R12, R20 \ + ADCS R13, R21 \ + ADCS R14, R22 \ + ADCS R15, R23 \ + ADCS R16, R24 \ + ADCS ZR, R25 \ + \ + \ // Our output is R21:R22:R23:R24. Reduce mod p if necessary. + SUBS R5, R21, R10 \ + SBCS R6, R22, R11 \ + SBCS R7, R23, R12 \ + SBCS R8, R24, R13 \ + \ + CSEL CS, R10, R21, R1 \ + CSEL CS, R11, R22, R2 \ + CSEL CS, R12, R23, R3 \ + CSEL CS, R13, R24, R4 diff --git a/crypto/bn256/cloudflare/mul_bmi2.h b/crypto/bn256/cloudflare/mul_bmi2_amd64.h index 71ad0499a..71ad0499a 100644 --- a/crypto/bn256/cloudflare/mul_bmi2.h +++ b/crypto/bn256/cloudflare/mul_bmi2_amd64.h |