big: ~8x faster number scanning

- better number scanning algorithm
- fixed a couple of bugs related to base interpretation
- added scan benchmark
- added more test cases and made tests more precise
- introduced Int.scan method matching nat.scan
- refactored Int.Scan; now uses int.scan
- refactored Int.SetString; now uses int.scan

There is more potential, this was a fairly simple change.

gotest -test.bench="ScanPi" before/after (best of 3 runs):
big.BenchmarkScanPi	   1000	    2024900 ns/op
big.BenchmarkScanPi       10000      257540 ns/op

R=chickencha
CC=golang-dev, rsc
https://golang.org/cl/4527089
This commit is contained in:
Robert Griesemer 2011-06-01 14:17:00 -07:00
parent 16dbf2182c
commit ce2701b2b0
3 changed files with 246 additions and 104 deletions

View File

@ -8,6 +8,7 @@ package big
import (
"fmt"
"io"
"os"
"rand"
"strings"
@ -375,11 +376,48 @@ func (x *Int) Format(s fmt.State, ch int) {
}
// Scan is a support routine for fmt.Scanner. It accepts the formats
// 'b' (binary), 'o' (octal), 'd' (decimal), 'x' (lowercase hexadecimal),
// and 'X' (uppercase hexadecimal).
func (x *Int) Scan(s fmt.ScanState, ch int) os.Error {
var base int
// scan sets z to the integer value corresponding to the longest possible prefix
// read from r representing a signed integer number in a given conversion base.
// It returns z, the actual conversion base used, and an error, if any. In the
// error case, the value of z is undefined. The syntax follows the syntax of
// integer literals in Go.
//
// The base argument must be 0 or a value from 2 through MaxBase. If the base
// is 0, the string prefix determines the actual conversion base. A prefix of
// ``0x'' or ``0X'' selects base 16; the ``0'' prefix selects base 8, and a
// ``0b'' or ``0B'' prefix selects base 2. Otherwise the selected base is 10.
//
func (z *Int) scan(r io.RuneScanner, base int) (*Int, int, os.Error) {
// determine sign
ch, _, err := r.ReadRune()
if err != nil {
return z, 0, err
}
neg := false
switch ch {
case '-':
neg = true
case '+': // nothing to do
default:
r.UnreadRune()
}
// determine mantissa
z.abs, base, err = z.abs.scan(r, base)
if err != nil {
return z, base, err
}
z.neg = len(z.abs) > 0 && neg // 0 has no sign
return z, base, nil
}
// Scan is a support routine for fmt.Scanner; it sets z to the value of
// the scanned number. It accepts the formats 'b' (binary), 'o' (octal),
// 'd' (decimal), 'x' (lowercase hexadecimal), and 'X' (uppercase hexadecimal).
func (z *Int) Scan(s fmt.ScanState, ch int) os.Error {
base := 0
switch ch {
case 'b':
base = 2
@ -394,32 +432,13 @@ func (x *Int) Scan(s fmt.ScanState, ch int) os.Error {
default:
return os.ErrorString("Int.Scan: invalid verb")
}
ch, _, err := s.ReadRune()
if err != nil {
return err
}
neg := false
switch ch {
case '-':
neg = true
case '+': // nothing to do
default:
s.UnreadRune()
}
x.abs, _, err = x.abs.scan(s, base)
if err != nil {
return err
}
x.neg = len(x.abs) > 0 && neg // 0 has no sign
return nil
_, _, err := z.scan(s, base)
return err
}
// Int64 returns the int64 representation of z.
// If z cannot be represented in an int64, the result is undefined.
// Int64 returns the int64 representation of x.
// If x cannot be represented in an int64, the result is undefined.
func (x *Int) Int64() int64 {
if len(x.abs) == 0 {
return 0
@ -439,33 +458,19 @@ func (x *Int) Int64() int64 {
// and returns z and a boolean indicating success. If SetString fails,
// the value of z is undefined.
//
// If the base argument is 0, the string prefix determines the actual
// conversion base. A prefix of ``0x'' or ``0X'' selects base 16; the
// ``0'' prefix selects base 8, and a ``0b'' or ``0B'' prefix selects
// base 2. Otherwise the selected base is 10.
// The base argument must be 0 or a value from 2 through MaxBase. If the base
// is 0, the string prefix determines the actual conversion base. A prefix of
// ``0x'' or ``0X'' selects base 16; the ``0'' prefix selects base 8, and a
// ``0b'' or ``0B'' prefix selects base 2. Otherwise the selected base is 10.
//
func (z *Int) SetString(s string, base int) (*Int, bool) {
neg := false
if len(s) > 0 {
switch s[0] {
case '-':
neg = true
fallthrough
case '+':
s = s[1:]
}
}
r := strings.NewReader(s)
abs, _, err := z.abs.scan(r, base)
_, _, err := z.scan(r, base)
if err != nil {
return z, false
}
_, _, err = r.ReadRune()
z.abs = abs
z.neg = len(abs) > 0 && neg // 0 has no sign
return z, err == os.EOF // err == os.EOF => scan consumed all of s
return z, err == os.EOF // err == os.EOF => scan consumed all of s
}

View File

@ -608,8 +608,11 @@ func (x nat) bitLen() int {
}
func hexValue(ch int) int {
var d int
// MaxBase is the largest number base accepted for string conversions.
const MaxBase = 'z' - 'a' + 10 + 1 // = hexValue('z') + 1
func hexValue(ch int) Word {
d := MaxBase + 1 // illegal base
switch {
case '0' <= ch && ch <= '9':
d = ch - '0'
@ -617,86 +620,106 @@ func hexValue(ch int) int {
d = ch - 'a' + 10
case 'A' <= ch && ch <= 'Z':
d = ch - 'A' + 10
default:
return -1
}
return d
return Word(d)
}
// scan returns the natural number corresponding to the longest
// possible prefix read from r representing a natural number in a
// given conversion base, the actual conversion base used, and an
// error, if any. The syntax of natural numbers follows the syntax of
// scan sets z to the natural number corresponding to the longest possible prefix
// read from r representing an unsigned integer in a given conversion base.
// It returns z, the actual conversion base used, and an error, if any. In the
// error case, the value of z is undefined. The syntax follows the syntax of
// unsigned integer literals in Go.
//
// If the base argument is 0, the string prefix determines the actual
// conversion base. A prefix of ``0x'' or ``0X'' selects base 16; the
// ``0'' prefix selects base 8, and a ``0b'' or ``0B'' prefix selects
// base 2. Otherwise the selected base is 10.
// The base argument must be 0 or a value from 2 through MaxBase. If the base
// is 0, the string prefix determines the actual conversion base. A prefix of
// ``0x'' or ``0X'' selects base 16; the ``0'' prefix selects base 8, and a
// ``0b'' or ``0B'' prefix selects base 2. Otherwise the selected base is 10.
//
func (z nat) scan(r io.RuneScanner, base int) (nat, int, os.Error) {
n := 0
// reject illegal bases
if base < 0 || base == 1 || MaxBase < base {
return z, 0, os.ErrorString("illegal number base")
}
// one char look-ahead
ch, _, err := r.ReadRune()
if err != nil {
return z, 0, err
}
// determine base if necessary
b := Word(base)
if base == 0 {
base = 10
b = 10
if ch == '0' {
n++
switch ch, _, err = r.ReadRune(); err {
case nil:
base = 8
b = 8
switch ch {
case 'x', 'X':
base = 16
b = 16
case 'b', 'B':
base = 2
b = 2
}
if base == 2 || base == 16 {
n--
if b == 2 || b == 16 {
if ch, _, err = r.ReadRune(); err != nil {
return z, 0, os.ErrorString("syntax error scanning binary or hexadecimal number")
return z, 0, err
}
}
case os.EOF:
return z, 10, nil
default:
return z, 0, err
return z, 10, err
}
}
}
// reject illegal bases
if base < 2 || 'z'-'a'+10 < base {
return z, 0, os.ErrorString("illegal number base")
}
// convert string
// - group as many digits d as possible together into a "super-digit" dd with "super-base" bb
// - only when bb does not fit into a word anymore, do a full number mulAddWW using bb and dd
z = z.make(0)
bb := Word(1)
dd := Word(0)
for {
d := hexValue(ch)
if 0 <= d && d < base {
z = z.mulAddWW(z, Word(base), Word(d))
} else {
r.UnreadRune()
if n > 0 {
break
}
return z, 0, os.ErrorString("syntax error scanning number")
if d >= b {
r.UnreadRune() // ch does not belong to number anymore
break
}
n++
if tmp := bb * b; tmp < bb {
// overflow
z = z.mulAddWW(z, bb, dd)
bb = b
dd = d
} else {
bb = tmp
dd = dd*b + d
}
if ch, _, err = r.ReadRune(); err != nil {
if err == os.EOF {
break
if err != os.EOF {
return z, int(b), err
}
return z, 0, err
break
}
}
return z.norm(), base, nil
switch {
case bb > 1:
// there was at least one mantissa digit
z = z.mulAddWW(z, bb, dd)
case base == 0 && b == 8:
// there was only the octal prefix 0 (possibly followed by digits > 7);
// return base 10, not 8
return z, 10, nil
case base != 0 || b != 8:
// there was neither a mantissa digit nor the octal prefix 0
return z, int(b), os.ErrorString("syntax error scanning number")
}
return z.norm(), int(b), nil
}

View File

@ -5,6 +5,7 @@
package big
import (
"os"
"strings"
"testing"
)
@ -211,28 +212,58 @@ func TestString(t *testing.T) {
var natScanTests = []struct {
s string // string to be scanned
base int // input base
x nat // expected nat
base int // expected base
b int // expected base
ok bool // expected success
next int // next character (or 0, if at EOF)
}{
{s: ""},
{"0", nil, 10, true},
{"0 ", nil, 8, true},
// error: illegal base
{base: -1},
{base: 1},
{base: 37},
// error: no mantissa
{},
{s: "?"},
{base: 10},
{base: 36},
{s: "?", base: 10},
{s: "0x"},
{"08", nil, 8, true},
{"0b1", nat{1}, 2, true},
{"0b11000101", nat{0xc5}, 2, true},
{"03271", nat{03271}, 8, true},
{"10ab", nat{10}, 10, true},
{"1234567890", nat{1234567890}, 10, true},
{"0xdeadbeef", nat{0xdeadbeef}, 16, true},
{"0XDEADBEEF", nat{0xdeadbeef}, 16, true},
{s: "345", base: 2},
// no errors
{"0", 0, nil, 10, true, 0},
{"0", 10, nil, 10, true, 0},
{"0", 36, nil, 36, true, 0},
{"1", 0, nat{1}, 10, true, 0},
{"1", 10, nat{1}, 10, true, 0},
{"0 ", 0, nil, 10, true, ' '},
{"08", 0, nil, 10, true, '8'},
{"018", 0, nat{1}, 8, true, '8'},
{"0b1", 0, nat{1}, 2, true, 0},
{"0b11000101", 0, nat{0xc5}, 2, true, 0},
{"03271", 0, nat{03271}, 8, true, 0},
{"10ab", 0, nat{10}, 10, true, 'a'},
{"1234567890", 0, nat{1234567890}, 10, true, 0},
{"xyz", 36, nat{(33*36+34)*36 + 35}, 36, true, 0},
{"xyz?", 36, nat{(33*36+34)*36 + 35}, 36, true, '?'},
{"0x", 16, nil, 16, true, 'x'},
{"0xdeadbeef", 0, nat{0xdeadbeef}, 16, true, 0},
{"0XDEADBEEF", 0, nat{0xdeadbeef}, 16, true, 0},
{"0xfedcba9876543213fedcba9876543212fedcba9876543211fedcba9876543210",
0,
nat{0xfedcba9876543210, 0xfedcba9876543211, 0xfedcba9876543212, 0xfedcba9876543213},
16,
true,
0},
}
func TestScanBase0(t *testing.T) {
func TestScanBase(t *testing.T) {
for _, a := range natScanTests {
x, b, err := nat(nil).scan(strings.NewReader(a.s), 0)
r := strings.NewReader(a.s)
x, b, err := nat(nil).scan(r, a.base)
if err == nil && !a.ok {
t.Errorf("scan%+v\n\texpected error", a)
}
@ -245,9 +276,92 @@ func TestScanBase0(t *testing.T) {
if x.cmp(a.x) != 0 {
t.Errorf("scan%+v\n\tgot z = %v; want %v", a, x, a.x)
}
if b != a.base {
if b != a.b {
t.Errorf("scan%+v\n\tgot b = %d; want %d", a, b, a.base)
}
next, _, err := r.ReadRune()
if err == os.EOF {
next = 0
err = nil
}
if err == nil && next != a.next {
t.Errorf("scan%+v\n\tgot next = %q; want %q", a, next, a.next)
}
}
}
var pi = "3" +
"14159265358979323846264338327950288419716939937510582097494459230781640628620899862803482534211706798214808651" +
"32823066470938446095505822317253594081284811174502841027019385211055596446229489549303819644288109756659334461" +
"28475648233786783165271201909145648566923460348610454326648213393607260249141273724587006606315588174881520920" +
"96282925409171536436789259036001133053054882046652138414695194151160943305727036575959195309218611738193261179" +
"31051185480744623799627495673518857527248912279381830119491298336733624406566430860213949463952247371907021798" +
"60943702770539217176293176752384674818467669405132000568127145263560827785771342757789609173637178721468440901" +
"22495343014654958537105079227968925892354201995611212902196086403441815981362977477130996051870721134999999837" +
"29780499510597317328160963185950244594553469083026425223082533446850352619311881710100031378387528865875332083" +
"81420617177669147303598253490428755468731159562863882353787593751957781857780532171226806613001927876611195909" +
"21642019893809525720106548586327886593615338182796823030195203530185296899577362259941389124972177528347913151" +
"55748572424541506959508295331168617278558890750983817546374649393192550604009277016711390098488240128583616035" +
"63707660104710181942955596198946767837449448255379774726847104047534646208046684259069491293313677028989152104" +
"75216205696602405803815019351125338243003558764024749647326391419927260426992279678235478163600934172164121992" +
"45863150302861829745557067498385054945885869269956909272107975093029553211653449872027559602364806654991198818" +
"34797753566369807426542527862551818417574672890977772793800081647060016145249192173217214772350141441973568548" +
"16136115735255213347574184946843852332390739414333454776241686251898356948556209921922218427255025425688767179" +
"04946016534668049886272327917860857843838279679766814541009538837863609506800642251252051173929848960841284886" +
"26945604241965285022210661186306744278622039194945047123713786960956364371917287467764657573962413890865832645" +
"99581339047802759009946576407895126946839835259570982582262052248940772671947826848260147699090264013639443745" +
"53050682034962524517493996514314298091906592509372216964615157098583874105978859597729754989301617539284681382" +
"68683868942774155991855925245953959431049972524680845987273644695848653836736222626099124608051243884390451244" +
"13654976278079771569143599770012961608944169486855584840635342207222582848864815845602850601684273945226746767" +
"88952521385225499546667278239864565961163548862305774564980355936345681743241125150760694794510965960940252288" +
"79710893145669136867228748940560101503308617928680920874760917824938589009714909675985261365549781893129784821" +
"68299894872265880485756401427047755513237964145152374623436454285844479526586782105114135473573952311342716610" +
"21359695362314429524849371871101457654035902799344037420073105785390621983874478084784896833214457138687519435" +
"06430218453191048481005370614680674919278191197939952061419663428754440643745123718192179998391015919561814675" +
"14269123974894090718649423196156794520809514655022523160388193014209376213785595663893778708303906979207734672" +
"21825625996615014215030680384477345492026054146659252014974428507325186660021324340881907104863317346496514539" +
"05796268561005508106658796998163574736384052571459102897064140110971206280439039759515677157700420337869936007" +
"23055876317635942187312514712053292819182618612586732157919841484882916447060957527069572209175671167229109816" +
"90915280173506712748583222871835209353965725121083579151369882091444210067510334671103141267111369908658516398" +
"31501970165151168517143765761835155650884909989859982387345528331635507647918535893226185489632132933089857064" +
"20467525907091548141654985946163718027098199430992448895757128289059232332609729971208443357326548938239119325" +
"97463667305836041428138830320382490375898524374417029132765618093773444030707469211201913020330380197621101100" +
"44929321516084244485963766983895228684783123552658213144957685726243344189303968642624341077322697802807318915" +
"44110104468232527162010526522721116603966655730925471105578537634668206531098965269186205647693125705863566201" +
"85581007293606598764861179104533488503461136576867532494416680396265797877185560845529654126654085306143444318" +
"58676975145661406800700237877659134401712749470420562230538994561314071127000407854733269939081454664645880797" +
"27082668306343285878569830523580893306575740679545716377525420211495576158140025012622859413021647155097925923" +
"09907965473761255176567513575178296664547791745011299614890304639947132962107340437518957359614589019389713111" +
"79042978285647503203198691514028708085990480109412147221317947647772622414254854540332157185306142288137585043" +
"06332175182979866223717215916077166925474873898665494945011465406284336639379003976926567214638530673609657120" +
"91807638327166416274888800786925602902284721040317211860820419000422966171196377921337575114959501566049631862" +
"94726547364252308177036751590673502350728354056704038674351362222477158915049530984448933309634087807693259939" +
"78054193414473774418426312986080998886874132604721569516239658645730216315981931951673538129741677294786724229" +
"24654366800980676928238280689964004824354037014163149658979409243237896907069779422362508221688957383798623001" +
"59377647165122893578601588161755782973523344604281512627203734314653197777416031990665541876397929334419521541" +
"34189948544473456738316249934191318148092777710386387734317720754565453220777092120190516609628049092636019759" +
"88281613323166636528619326686336062735676303544776280350450777235547105859548702790814356240145171806246436267" +
"94561275318134078330336254232783944975382437205835311477119926063813346776879695970309833913077109870408591337"
// Test case for BenchmarkScanPi.
func TestScanPi(t *testing.T) {
var x nat
z, _, err := x.scan(strings.NewReader(pi), 10)
if err != nil {
t.Errorf("scanning pi: %s", err)
}
if s := z.decimalString(); s != pi {
t.Errorf("scanning pi: got %s", s)
}
}
func BenchmarkScanPi(b *testing.B) {
for i := 0; i < b.N; i++ {
var x nat
x.scan(strings.NewReader(pi), 10)
}
}