From 018c60bd8f2447ee1426568707d7179623dac552 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9my=20Oudompheng?= Date: Fri, 4 May 2012 19:05:26 +0200 Subject: [PATCH] math/big: fix superpolynomial complexity in Karatsuba algorithm. benchmark old ns/op new ns/op delta BenchmarkExp3Power0x10 732 734 +0.27% BenchmarkExp3Power0x40 834 836 +0.24% BenchmarkExp3Power0x100 1600 1579 -1.31% BenchmarkExp3Power0x400 3478 3417 -1.75% BenchmarkExp3Power0x1000 19388 19229 -0.82% BenchmarkExp3Power0x4000 160274 156881 -2.12% BenchmarkExp3Power0x10000 1552050 1372058 -11.60% BenchmarkExp3Power0x40000 27328710 15216920 -44.32% BenchmarkExp3Power0x100000 612349000 131407100 -78.54% BenchmarkExp3Power0x400000 44073524000 1122195000 -97.45% R=golang-dev, mtj, gri, rsc CC=golang-dev, remy https://golang.org/cl/6176043 --- src/pkg/math/big/nat.go | 10 +++++----- src/pkg/math/big/nat_test.go | 18 ++++++++++++++++++ 2 files changed, 23 insertions(+), 5 deletions(-) diff --git a/src/pkg/math/big/nat.go b/src/pkg/math/big/nat.go index 0bc6572b96..eaa6ff0666 100644 --- a/src/pkg/math/big/nat.go +++ b/src/pkg/math/big/nat.go @@ -271,10 +271,10 @@ func karatsuba(z, x, y nat) { // xd = x1 - x0 // yd = y0 - y1 // - // z1 = xd*yd + z1 + z0 - // = (x1-x0)*(y0 - y1) + z1 + z0 - // = x1*y0 - x1*y1 - x0*y0 + x0*y1 + z1 + z0 - // = x1*y0 - z1 - z0 + x0*y1 + z1 + z0 + // z1 = xd*yd + z2 + z0 + // = (x1-x0)*(y0 - y1) + z2 + z0 + // = x1*y0 - x1*y1 - x0*y0 + x0*y1 + z2 + z0 + // = x1*y0 - z2 - z0 + x0*y1 + z2 + z0 // = x1*y0 + x0*y1 // split x, y into "digits" @@ -318,7 +318,7 @@ func karatsuba(z, x, y nat) { // save original z2:z0 // (ok to use upper half of z since we're done recursing) r := z[n*4:] - copy(r, z) + copy(r, z[:n*2]) // add up all partial products // diff --git a/src/pkg/math/big/nat_test.go b/src/pkg/math/big/nat_test.go index 7f3f76dc36..becde5d171 100644 --- a/src/pkg/math/big/nat_test.go +++ b/src/pkg/math/big/nat_test.go @@ -661,3 +661,21 @@ func TestExpNN(t *testing.T) { } } } + +func ExpHelper(b *testing.B, x, y Word) { + var z nat + for i := 0; i < b.N; i++ { + z.expWW(x, y) + } +} + +func BenchmarkExp3Power0x10(b *testing.B) { ExpHelper(b, 3, 0x10) } +func BenchmarkExp3Power0x40(b *testing.B) { ExpHelper(b, 3, 0x40) } +func BenchmarkExp3Power0x100(b *testing.B) { ExpHelper(b, 3, 0x100) } +func BenchmarkExp3Power0x400(b *testing.B) { ExpHelper(b, 3, 0x400) } +func BenchmarkExp3Power0x1000(b *testing.B) { ExpHelper(b, 3, 0x1000) } +func BenchmarkExp3Power0x4000(b *testing.B) { ExpHelper(b, 3, 0x4000) } +func BenchmarkExp3Power0x10000(b *testing.B) { ExpHelper(b, 3, 0x10000) } +func BenchmarkExp3Power0x40000(b *testing.B) { ExpHelper(b, 3, 0x40000) } +func BenchmarkExp3Power0x100000(b *testing.B) { ExpHelper(b, 3, 0x100000) } +func BenchmarkExp3Power0x400000(b *testing.B) { ExpHelper(b, 3, 0x400000) }