mirror of
https://github.com/golang/go.git
synced 2024-09-21 10:28:27 +00:00
cmd/compile: generalize strength reduction of mulq
* This is an improved version of an earlier patch. * Verified with gcc up to 100. * Limited to two instructions based on costs from https://gmplib.org/~tege/x86-timing.pdf Change-Id: Ib7c37de6fd8e0ba554459b15c7409508cbcf6728 Reviewed-on: https://go-review.googlesource.com/21103 Reviewed-by: Keith Randall <khr@golang.org> Run-TryBot: Alexandru Moșoi <alexandru@mosoi.ro> TryBot-Result: Gobot Gobot <gobot@golang.org>
This commit is contained in:
parent
1624a9c9e7
commit
d8f1f8d856
@ -583,14 +583,35 @@
|
||||
(CMPB (MOVBconst [c]) x) -> (InvertFlags (CMPBconst x [c]))
|
||||
|
||||
// strength reduction
|
||||
// Assumes that the following costs from https://gmplib.org/~tege/x86-timing.pdf:
|
||||
// 1 - addq, shlq, leaq, negq
|
||||
// 3 - imulq
|
||||
// This limits the rewrites to two instructions.
|
||||
// TODO: 27, 81
|
||||
(MULQconst [-1] x) -> (NEGQ x)
|
||||
(MULQconst [0] _) -> (MOVQconst [0])
|
||||
(MULQconst [1] x) -> x
|
||||
(MULQconst [3] x) -> (LEAQ2 x x)
|
||||
(MULQconst [5] x) -> (LEAQ4 x x)
|
||||
(MULQconst [7] x) -> (LEAQ8 (NEGQ <v.Type> x) x)
|
||||
(MULQconst [9] x) -> (LEAQ8 x x)
|
||||
(MULQconst [24] x) -> (SHLQconst [3] (LEAQ2 <v.Type> x x)) // Useful for [][]T accesses
|
||||
(MULQconst [11] x) -> (LEAQ2 x (LEAQ4 <v.Type> x x))
|
||||
(MULQconst [13] x) -> (LEAQ4 x (LEAQ2 <v.Type> x x))
|
||||
(MULQconst [21] x) -> (LEAQ4 x (LEAQ4 <v.Type> x x))
|
||||
(MULQconst [25] x) -> (LEAQ8 x (LEAQ2 <v.Type> x x))
|
||||
(MULQconst [37] x) -> (LEAQ4 x (LEAQ8 <v.Type> x x))
|
||||
(MULQconst [41] x) -> (LEAQ8 x (LEAQ4 <v.Type> x x))
|
||||
(MULQconst [73] x) -> (LEAQ8 x (LEAQ8 <v.Type> x x))
|
||||
|
||||
(MULQconst [c] x) && isPowerOfTwo(c) -> (SHLQconst [log2(c)] x)
|
||||
(MULQconst [c] x) && isPowerOfTwo(c+1) && c >= 15 -> (SUBQ (SHLQconst <v.Type> [log2(c+1)] x) x)
|
||||
(MULQconst [c] x) && isPowerOfTwo(c-1) && c >= 17 -> (LEAQ1 (SHLQconst <v.Type> [log2(c-1)] x) x)
|
||||
(MULQconst [c] x) && isPowerOfTwo(c-2) && c >= 34 -> (LEAQ2 (SHLQconst <v.Type> [log2(c-2)] x) x)
|
||||
(MULQconst [c] x) && isPowerOfTwo(c-4) && c >= 68 -> (LEAQ4 (SHLQconst <v.Type> [log2(c-4)] x) x)
|
||||
(MULQconst [c] x) && isPowerOfTwo(c-8) && c >= 136 -> (LEAQ8 (SHLQconst <v.Type> [log2(c-8)] x) x)
|
||||
(MULQconst [c] x) && c%3 == 0 && isPowerOfTwo(c/3)-> (SHLQconst [log2(c/3)] (LEAQ2 <v.Type> x x))
|
||||
(MULQconst [c] x) && c%5 == 0 && isPowerOfTwo(c/5)-> (SHLQconst [log2(c/5)] (LEAQ4 <v.Type> x x))
|
||||
(MULQconst [c] x) && c%9 == 0 && isPowerOfTwo(c/9)-> (SHLQconst [log2(c/9)] (LEAQ8 <v.Type> x x))
|
||||
|
||||
// combine add/shift into LEAQ
|
||||
(ADDQ x (SHLQconst [3] y)) -> (LEAQ8 x y)
|
||||
|
@ -9392,6 +9392,21 @@ func rewriteValueAMD64_OpAMD64MULQconst(v *Value, config *Config) bool {
|
||||
v.AddArg(x)
|
||||
return true
|
||||
}
|
||||
// match: (MULQconst [7] x)
|
||||
// cond:
|
||||
// result: (LEAQ8 (NEGQ <v.Type> x) x)
|
||||
for {
|
||||
if v.AuxInt != 7 {
|
||||
break
|
||||
}
|
||||
x := v.Args[0]
|
||||
v.reset(OpAMD64LEAQ8)
|
||||
v0 := b.NewValue0(v.Line, OpAMD64NEGQ, v.Type)
|
||||
v0.AddArg(x)
|
||||
v.AddArg(v0)
|
||||
v.AddArg(x)
|
||||
return true
|
||||
}
|
||||
// match: (MULQconst [9] x)
|
||||
// cond:
|
||||
// result: (LEAQ8 x x)
|
||||
@ -9405,22 +9420,118 @@ func rewriteValueAMD64_OpAMD64MULQconst(v *Value, config *Config) bool {
|
||||
v.AddArg(x)
|
||||
return true
|
||||
}
|
||||
// match: (MULQconst [24] x)
|
||||
// match: (MULQconst [11] x)
|
||||
// cond:
|
||||
// result: (SHLQconst [3] (LEAQ2 <v.Type> x x))
|
||||
// result: (LEAQ2 x (LEAQ4 <v.Type> x x))
|
||||
for {
|
||||
if v.AuxInt != 24 {
|
||||
if v.AuxInt != 11 {
|
||||
break
|
||||
}
|
||||
x := v.Args[0]
|
||||
v.reset(OpAMD64SHLQconst)
|
||||
v.AuxInt = 3
|
||||
v.reset(OpAMD64LEAQ2)
|
||||
v.AddArg(x)
|
||||
v0 := b.NewValue0(v.Line, OpAMD64LEAQ4, v.Type)
|
||||
v0.AddArg(x)
|
||||
v0.AddArg(x)
|
||||
v.AddArg(v0)
|
||||
return true
|
||||
}
|
||||
// match: (MULQconst [13] x)
|
||||
// cond:
|
||||
// result: (LEAQ4 x (LEAQ2 <v.Type> x x))
|
||||
for {
|
||||
if v.AuxInt != 13 {
|
||||
break
|
||||
}
|
||||
x := v.Args[0]
|
||||
v.reset(OpAMD64LEAQ4)
|
||||
v.AddArg(x)
|
||||
v0 := b.NewValue0(v.Line, OpAMD64LEAQ2, v.Type)
|
||||
v0.AddArg(x)
|
||||
v0.AddArg(x)
|
||||
v.AddArg(v0)
|
||||
return true
|
||||
}
|
||||
// match: (MULQconst [21] x)
|
||||
// cond:
|
||||
// result: (LEAQ4 x (LEAQ4 <v.Type> x x))
|
||||
for {
|
||||
if v.AuxInt != 21 {
|
||||
break
|
||||
}
|
||||
x := v.Args[0]
|
||||
v.reset(OpAMD64LEAQ4)
|
||||
v.AddArg(x)
|
||||
v0 := b.NewValue0(v.Line, OpAMD64LEAQ4, v.Type)
|
||||
v0.AddArg(x)
|
||||
v0.AddArg(x)
|
||||
v.AddArg(v0)
|
||||
return true
|
||||
}
|
||||
// match: (MULQconst [25] x)
|
||||
// cond:
|
||||
// result: (LEAQ8 x (LEAQ2 <v.Type> x x))
|
||||
for {
|
||||
if v.AuxInt != 25 {
|
||||
break
|
||||
}
|
||||
x := v.Args[0]
|
||||
v.reset(OpAMD64LEAQ8)
|
||||
v.AddArg(x)
|
||||
v0 := b.NewValue0(v.Line, OpAMD64LEAQ2, v.Type)
|
||||
v0.AddArg(x)
|
||||
v0.AddArg(x)
|
||||
v.AddArg(v0)
|
||||
return true
|
||||
}
|
||||
// match: (MULQconst [37] x)
|
||||
// cond:
|
||||
// result: (LEAQ4 x (LEAQ8 <v.Type> x x))
|
||||
for {
|
||||
if v.AuxInt != 37 {
|
||||
break
|
||||
}
|
||||
x := v.Args[0]
|
||||
v.reset(OpAMD64LEAQ4)
|
||||
v.AddArg(x)
|
||||
v0 := b.NewValue0(v.Line, OpAMD64LEAQ8, v.Type)
|
||||
v0.AddArg(x)
|
||||
v0.AddArg(x)
|
||||
v.AddArg(v0)
|
||||
return true
|
||||
}
|
||||
// match: (MULQconst [41] x)
|
||||
// cond:
|
||||
// result: (LEAQ8 x (LEAQ4 <v.Type> x x))
|
||||
for {
|
||||
if v.AuxInt != 41 {
|
||||
break
|
||||
}
|
||||
x := v.Args[0]
|
||||
v.reset(OpAMD64LEAQ8)
|
||||
v.AddArg(x)
|
||||
v0 := b.NewValue0(v.Line, OpAMD64LEAQ4, v.Type)
|
||||
v0.AddArg(x)
|
||||
v0.AddArg(x)
|
||||
v.AddArg(v0)
|
||||
return true
|
||||
}
|
||||
// match: (MULQconst [73] x)
|
||||
// cond:
|
||||
// result: (LEAQ8 x (LEAQ8 <v.Type> x x))
|
||||
for {
|
||||
if v.AuxInt != 73 {
|
||||
break
|
||||
}
|
||||
x := v.Args[0]
|
||||
v.reset(OpAMD64LEAQ8)
|
||||
v.AddArg(x)
|
||||
v0 := b.NewValue0(v.Line, OpAMD64LEAQ8, v.Type)
|
||||
v0.AddArg(x)
|
||||
v0.AddArg(x)
|
||||
v.AddArg(v0)
|
||||
return true
|
||||
}
|
||||
// match: (MULQconst [c] x)
|
||||
// cond: isPowerOfTwo(c)
|
||||
// result: (SHLQconst [log2(c)] x)
|
||||
@ -9435,6 +9546,142 @@ func rewriteValueAMD64_OpAMD64MULQconst(v *Value, config *Config) bool {
|
||||
v.AddArg(x)
|
||||
return true
|
||||
}
|
||||
// match: (MULQconst [c] x)
|
||||
// cond: isPowerOfTwo(c+1) && c >= 15
|
||||
// result: (SUBQ (SHLQconst <v.Type> [log2(c+1)] x) x)
|
||||
for {
|
||||
c := v.AuxInt
|
||||
x := v.Args[0]
|
||||
if !(isPowerOfTwo(c+1) && c >= 15) {
|
||||
break
|
||||
}
|
||||
v.reset(OpAMD64SUBQ)
|
||||
v0 := b.NewValue0(v.Line, OpAMD64SHLQconst, v.Type)
|
||||
v0.AuxInt = log2(c + 1)
|
||||
v0.AddArg(x)
|
||||
v.AddArg(v0)
|
||||
v.AddArg(x)
|
||||
return true
|
||||
}
|
||||
// match: (MULQconst [c] x)
|
||||
// cond: isPowerOfTwo(c-1) && c >= 17
|
||||
// result: (LEAQ1 (SHLQconst <v.Type> [log2(c-1)] x) x)
|
||||
for {
|
||||
c := v.AuxInt
|
||||
x := v.Args[0]
|
||||
if !(isPowerOfTwo(c-1) && c >= 17) {
|
||||
break
|
||||
}
|
||||
v.reset(OpAMD64LEAQ1)
|
||||
v0 := b.NewValue0(v.Line, OpAMD64SHLQconst, v.Type)
|
||||
v0.AuxInt = log2(c - 1)
|
||||
v0.AddArg(x)
|
||||
v.AddArg(v0)
|
||||
v.AddArg(x)
|
||||
return true
|
||||
}
|
||||
// match: (MULQconst [c] x)
|
||||
// cond: isPowerOfTwo(c-2) && c >= 34
|
||||
// result: (LEAQ2 (SHLQconst <v.Type> [log2(c-2)] x) x)
|
||||
for {
|
||||
c := v.AuxInt
|
||||
x := v.Args[0]
|
||||
if !(isPowerOfTwo(c-2) && c >= 34) {
|
||||
break
|
||||
}
|
||||
v.reset(OpAMD64LEAQ2)
|
||||
v0 := b.NewValue0(v.Line, OpAMD64SHLQconst, v.Type)
|
||||
v0.AuxInt = log2(c - 2)
|
||||
v0.AddArg(x)
|
||||
v.AddArg(v0)
|
||||
v.AddArg(x)
|
||||
return true
|
||||
}
|
||||
// match: (MULQconst [c] x)
|
||||
// cond: isPowerOfTwo(c-4) && c >= 68
|
||||
// result: (LEAQ4 (SHLQconst <v.Type> [log2(c-4)] x) x)
|
||||
for {
|
||||
c := v.AuxInt
|
||||
x := v.Args[0]
|
||||
if !(isPowerOfTwo(c-4) && c >= 68) {
|
||||
break
|
||||
}
|
||||
v.reset(OpAMD64LEAQ4)
|
||||
v0 := b.NewValue0(v.Line, OpAMD64SHLQconst, v.Type)
|
||||
v0.AuxInt = log2(c - 4)
|
||||
v0.AddArg(x)
|
||||
v.AddArg(v0)
|
||||
v.AddArg(x)
|
||||
return true
|
||||
}
|
||||
// match: (MULQconst [c] x)
|
||||
// cond: isPowerOfTwo(c-8) && c >= 136
|
||||
// result: (LEAQ8 (SHLQconst <v.Type> [log2(c-8)] x) x)
|
||||
for {
|
||||
c := v.AuxInt
|
||||
x := v.Args[0]
|
||||
if !(isPowerOfTwo(c-8) && c >= 136) {
|
||||
break
|
||||
}
|
||||
v.reset(OpAMD64LEAQ8)
|
||||
v0 := b.NewValue0(v.Line, OpAMD64SHLQconst, v.Type)
|
||||
v0.AuxInt = log2(c - 8)
|
||||
v0.AddArg(x)
|
||||
v.AddArg(v0)
|
||||
v.AddArg(x)
|
||||
return true
|
||||
}
|
||||
// match: (MULQconst [c] x)
|
||||
// cond: c%3 == 0 && isPowerOfTwo(c/3)
|
||||
// result: (SHLQconst [log2(c/3)] (LEAQ2 <v.Type> x x))
|
||||
for {
|
||||
c := v.AuxInt
|
||||
x := v.Args[0]
|
||||
if !(c%3 == 0 && isPowerOfTwo(c/3)) {
|
||||
break
|
||||
}
|
||||
v.reset(OpAMD64SHLQconst)
|
||||
v.AuxInt = log2(c / 3)
|
||||
v0 := b.NewValue0(v.Line, OpAMD64LEAQ2, v.Type)
|
||||
v0.AddArg(x)
|
||||
v0.AddArg(x)
|
||||
v.AddArg(v0)
|
||||
return true
|
||||
}
|
||||
// match: (MULQconst [c] x)
|
||||
// cond: c%5 == 0 && isPowerOfTwo(c/5)
|
||||
// result: (SHLQconst [log2(c/5)] (LEAQ4 <v.Type> x x))
|
||||
for {
|
||||
c := v.AuxInt
|
||||
x := v.Args[0]
|
||||
if !(c%5 == 0 && isPowerOfTwo(c/5)) {
|
||||
break
|
||||
}
|
||||
v.reset(OpAMD64SHLQconst)
|
||||
v.AuxInt = log2(c / 5)
|
||||
v0 := b.NewValue0(v.Line, OpAMD64LEAQ4, v.Type)
|
||||
v0.AddArg(x)
|
||||
v0.AddArg(x)
|
||||
v.AddArg(v0)
|
||||
return true
|
||||
}
|
||||
// match: (MULQconst [c] x)
|
||||
// cond: c%9 == 0 && isPowerOfTwo(c/9)
|
||||
// result: (SHLQconst [log2(c/9)] (LEAQ8 <v.Type> x x))
|
||||
for {
|
||||
c := v.AuxInt
|
||||
x := v.Args[0]
|
||||
if !(c%9 == 0 && isPowerOfTwo(c/9)) {
|
||||
break
|
||||
}
|
||||
v.reset(OpAMD64SHLQconst)
|
||||
v.AuxInt = log2(c / 9)
|
||||
v0 := b.NewValue0(v.Line, OpAMD64LEAQ8, v.Type)
|
||||
v0.AddArg(x)
|
||||
v0.AddArg(x)
|
||||
v.AddArg(v0)
|
||||
return true
|
||||
}
|
||||
// match: (MULQconst [c] (MOVQconst [d]))
|
||||
// cond:
|
||||
// result: (MOVQconst [c*d])
|
||||
|
45
test/strength.go
Normal file
45
test/strength.go
Normal file
@ -0,0 +1,45 @@
|
||||
// runoutput
|
||||
|
||||
// Copyright 2016 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Generate test of strength reduction for multiplications
|
||||
// with contstants. Especially useful for amd64/386.
|
||||
|
||||
package main
|
||||
|
||||
import "fmt"
|
||||
|
||||
func testMul(fact, bits int) string {
|
||||
n := fmt.Sprintf("testMul_%d_%d", fact, bits)
|
||||
fmt.Printf("func %s(s int%d) {\n", n, bits)
|
||||
|
||||
want := 0
|
||||
for i := 0; i < 200; i++ {
|
||||
fmt.Printf(` if want, got := int%d(%d), s*%d; want != got {
|
||||
failed = true
|
||||
fmt.Printf("got %d * %%d == %%d, wanted %d\n", s, got)
|
||||
}
|
||||
`, bits, want, i, i, want)
|
||||
want += fact
|
||||
}
|
||||
|
||||
fmt.Printf("}\n")
|
||||
return fmt.Sprintf("%s(%d)", n, fact)
|
||||
}
|
||||
|
||||
func main() {
|
||||
fmt.Printf("package main\n")
|
||||
fmt.Printf("import \"fmt\"\n")
|
||||
fmt.Printf("var failed = false\n")
|
||||
|
||||
f1 := testMul(17, 32)
|
||||
f2 := testMul(131, 64)
|
||||
|
||||
fmt.Printf("func main() {\n")
|
||||
fmt.Println(f1)
|
||||
fmt.Println(f2)
|
||||
fmt.Printf("if failed {\n panic(\"multiplication failed\")\n}\n")
|
||||
fmt.Printf("}\n")
|
||||
}
|
Loading…
Reference in New Issue
Block a user