cmd/compile: generalize strength reduction of mulq

* This is an improved version of an earlier patch.
* Verified with gcc up to 100.
* Limited to two instructions based on costs from
https://gmplib.org/~tege/x86-timing.pdf

Change-Id: Ib7c37de6fd8e0ba554459b15c7409508cbcf6728
Reviewed-on: https://go-review.googlesource.com/21103
Reviewed-by: Keith Randall <khr@golang.org>
Run-TryBot: Alexandru Moșoi <alexandru@mosoi.ro>
TryBot-Result: Gobot Gobot <gobot@golang.org>
This commit is contained in:
Alexandru Moșoi 2016-03-24 22:46:37 +01:00 committed by Alexandru Moșoi
parent 1624a9c9e7
commit d8f1f8d856
3 changed files with 319 additions and 6 deletions

View File

@ -583,14 +583,35 @@
(CMPB (MOVBconst [c]) x) -> (InvertFlags (CMPBconst x [c]))
// strength reduction
// Assumes that the following costs from https://gmplib.org/~tege/x86-timing.pdf:
// 1 - addq, shlq, leaq, negq
// 3 - imulq
// This limits the rewrites to two instructions.
// TODO: 27, 81
(MULQconst [-1] x) -> (NEGQ x)
(MULQconst [0] _) -> (MOVQconst [0])
(MULQconst [1] x) -> x
(MULQconst [3] x) -> (LEAQ2 x x)
(MULQconst [5] x) -> (LEAQ4 x x)
(MULQconst [7] x) -> (LEAQ8 (NEGQ <v.Type> x) x)
(MULQconst [9] x) -> (LEAQ8 x x)
(MULQconst [24] x) -> (SHLQconst [3] (LEAQ2 <v.Type> x x)) // Useful for [][]T accesses
(MULQconst [11] x) -> (LEAQ2 x (LEAQ4 <v.Type> x x))
(MULQconst [13] x) -> (LEAQ4 x (LEAQ2 <v.Type> x x))
(MULQconst [21] x) -> (LEAQ4 x (LEAQ4 <v.Type> x x))
(MULQconst [25] x) -> (LEAQ8 x (LEAQ2 <v.Type> x x))
(MULQconst [37] x) -> (LEAQ4 x (LEAQ8 <v.Type> x x))
(MULQconst [41] x) -> (LEAQ8 x (LEAQ4 <v.Type> x x))
(MULQconst [73] x) -> (LEAQ8 x (LEAQ8 <v.Type> x x))
(MULQconst [c] x) && isPowerOfTwo(c) -> (SHLQconst [log2(c)] x)
(MULQconst [c] x) && isPowerOfTwo(c+1) && c >= 15 -> (SUBQ (SHLQconst <v.Type> [log2(c+1)] x) x)
(MULQconst [c] x) && isPowerOfTwo(c-1) && c >= 17 -> (LEAQ1 (SHLQconst <v.Type> [log2(c-1)] x) x)
(MULQconst [c] x) && isPowerOfTwo(c-2) && c >= 34 -> (LEAQ2 (SHLQconst <v.Type> [log2(c-2)] x) x)
(MULQconst [c] x) && isPowerOfTwo(c-4) && c >= 68 -> (LEAQ4 (SHLQconst <v.Type> [log2(c-4)] x) x)
(MULQconst [c] x) && isPowerOfTwo(c-8) && c >= 136 -> (LEAQ8 (SHLQconst <v.Type> [log2(c-8)] x) x)
(MULQconst [c] x) && c%3 == 0 && isPowerOfTwo(c/3)-> (SHLQconst [log2(c/3)] (LEAQ2 <v.Type> x x))
(MULQconst [c] x) && c%5 == 0 && isPowerOfTwo(c/5)-> (SHLQconst [log2(c/5)] (LEAQ4 <v.Type> x x))
(MULQconst [c] x) && c%9 == 0 && isPowerOfTwo(c/9)-> (SHLQconst [log2(c/9)] (LEAQ8 <v.Type> x x))
// combine add/shift into LEAQ
(ADDQ x (SHLQconst [3] y)) -> (LEAQ8 x y)

View File

@ -9392,6 +9392,21 @@ func rewriteValueAMD64_OpAMD64MULQconst(v *Value, config *Config) bool {
v.AddArg(x)
return true
}
// match: (MULQconst [7] x)
// cond:
// result: (LEAQ8 (NEGQ <v.Type> x) x)
for {
if v.AuxInt != 7 {
break
}
x := v.Args[0]
v.reset(OpAMD64LEAQ8)
v0 := b.NewValue0(v.Line, OpAMD64NEGQ, v.Type)
v0.AddArg(x)
v.AddArg(v0)
v.AddArg(x)
return true
}
// match: (MULQconst [9] x)
// cond:
// result: (LEAQ8 x x)
@ -9405,22 +9420,118 @@ func rewriteValueAMD64_OpAMD64MULQconst(v *Value, config *Config) bool {
v.AddArg(x)
return true
}
// match: (MULQconst [24] x)
// match: (MULQconst [11] x)
// cond:
// result: (SHLQconst [3] (LEAQ2 <v.Type> x x))
// result: (LEAQ2 x (LEAQ4 <v.Type> x x))
for {
if v.AuxInt != 24 {
if v.AuxInt != 11 {
break
}
x := v.Args[0]
v.reset(OpAMD64SHLQconst)
v.AuxInt = 3
v.reset(OpAMD64LEAQ2)
v.AddArg(x)
v0 := b.NewValue0(v.Line, OpAMD64LEAQ4, v.Type)
v0.AddArg(x)
v0.AddArg(x)
v.AddArg(v0)
return true
}
// match: (MULQconst [13] x)
// cond:
// result: (LEAQ4 x (LEAQ2 <v.Type> x x))
for {
if v.AuxInt != 13 {
break
}
x := v.Args[0]
v.reset(OpAMD64LEAQ4)
v.AddArg(x)
v0 := b.NewValue0(v.Line, OpAMD64LEAQ2, v.Type)
v0.AddArg(x)
v0.AddArg(x)
v.AddArg(v0)
return true
}
// match: (MULQconst [21] x)
// cond:
// result: (LEAQ4 x (LEAQ4 <v.Type> x x))
for {
if v.AuxInt != 21 {
break
}
x := v.Args[0]
v.reset(OpAMD64LEAQ4)
v.AddArg(x)
v0 := b.NewValue0(v.Line, OpAMD64LEAQ4, v.Type)
v0.AddArg(x)
v0.AddArg(x)
v.AddArg(v0)
return true
}
// match: (MULQconst [25] x)
// cond:
// result: (LEAQ8 x (LEAQ2 <v.Type> x x))
for {
if v.AuxInt != 25 {
break
}
x := v.Args[0]
v.reset(OpAMD64LEAQ8)
v.AddArg(x)
v0 := b.NewValue0(v.Line, OpAMD64LEAQ2, v.Type)
v0.AddArg(x)
v0.AddArg(x)
v.AddArg(v0)
return true
}
// match: (MULQconst [37] x)
// cond:
// result: (LEAQ4 x (LEAQ8 <v.Type> x x))
for {
if v.AuxInt != 37 {
break
}
x := v.Args[0]
v.reset(OpAMD64LEAQ4)
v.AddArg(x)
v0 := b.NewValue0(v.Line, OpAMD64LEAQ8, v.Type)
v0.AddArg(x)
v0.AddArg(x)
v.AddArg(v0)
return true
}
// match: (MULQconst [41] x)
// cond:
// result: (LEAQ8 x (LEAQ4 <v.Type> x x))
for {
if v.AuxInt != 41 {
break
}
x := v.Args[0]
v.reset(OpAMD64LEAQ8)
v.AddArg(x)
v0 := b.NewValue0(v.Line, OpAMD64LEAQ4, v.Type)
v0.AddArg(x)
v0.AddArg(x)
v.AddArg(v0)
return true
}
// match: (MULQconst [73] x)
// cond:
// result: (LEAQ8 x (LEAQ8 <v.Type> x x))
for {
if v.AuxInt != 73 {
break
}
x := v.Args[0]
v.reset(OpAMD64LEAQ8)
v.AddArg(x)
v0 := b.NewValue0(v.Line, OpAMD64LEAQ8, v.Type)
v0.AddArg(x)
v0.AddArg(x)
v.AddArg(v0)
return true
}
// match: (MULQconst [c] x)
// cond: isPowerOfTwo(c)
// result: (SHLQconst [log2(c)] x)
@ -9435,6 +9546,142 @@ func rewriteValueAMD64_OpAMD64MULQconst(v *Value, config *Config) bool {
v.AddArg(x)
return true
}
// match: (MULQconst [c] x)
// cond: isPowerOfTwo(c+1) && c >= 15
// result: (SUBQ (SHLQconst <v.Type> [log2(c+1)] x) x)
for {
c := v.AuxInt
x := v.Args[0]
if !(isPowerOfTwo(c+1) && c >= 15) {
break
}
v.reset(OpAMD64SUBQ)
v0 := b.NewValue0(v.Line, OpAMD64SHLQconst, v.Type)
v0.AuxInt = log2(c + 1)
v0.AddArg(x)
v.AddArg(v0)
v.AddArg(x)
return true
}
// match: (MULQconst [c] x)
// cond: isPowerOfTwo(c-1) && c >= 17
// result: (LEAQ1 (SHLQconst <v.Type> [log2(c-1)] x) x)
for {
c := v.AuxInt
x := v.Args[0]
if !(isPowerOfTwo(c-1) && c >= 17) {
break
}
v.reset(OpAMD64LEAQ1)
v0 := b.NewValue0(v.Line, OpAMD64SHLQconst, v.Type)
v0.AuxInt = log2(c - 1)
v0.AddArg(x)
v.AddArg(v0)
v.AddArg(x)
return true
}
// match: (MULQconst [c] x)
// cond: isPowerOfTwo(c-2) && c >= 34
// result: (LEAQ2 (SHLQconst <v.Type> [log2(c-2)] x) x)
for {
c := v.AuxInt
x := v.Args[0]
if !(isPowerOfTwo(c-2) && c >= 34) {
break
}
v.reset(OpAMD64LEAQ2)
v0 := b.NewValue0(v.Line, OpAMD64SHLQconst, v.Type)
v0.AuxInt = log2(c - 2)
v0.AddArg(x)
v.AddArg(v0)
v.AddArg(x)
return true
}
// match: (MULQconst [c] x)
// cond: isPowerOfTwo(c-4) && c >= 68
// result: (LEAQ4 (SHLQconst <v.Type> [log2(c-4)] x) x)
for {
c := v.AuxInt
x := v.Args[0]
if !(isPowerOfTwo(c-4) && c >= 68) {
break
}
v.reset(OpAMD64LEAQ4)
v0 := b.NewValue0(v.Line, OpAMD64SHLQconst, v.Type)
v0.AuxInt = log2(c - 4)
v0.AddArg(x)
v.AddArg(v0)
v.AddArg(x)
return true
}
// match: (MULQconst [c] x)
// cond: isPowerOfTwo(c-8) && c >= 136
// result: (LEAQ8 (SHLQconst <v.Type> [log2(c-8)] x) x)
for {
c := v.AuxInt
x := v.Args[0]
if !(isPowerOfTwo(c-8) && c >= 136) {
break
}
v.reset(OpAMD64LEAQ8)
v0 := b.NewValue0(v.Line, OpAMD64SHLQconst, v.Type)
v0.AuxInt = log2(c - 8)
v0.AddArg(x)
v.AddArg(v0)
v.AddArg(x)
return true
}
// match: (MULQconst [c] x)
// cond: c%3 == 0 && isPowerOfTwo(c/3)
// result: (SHLQconst [log2(c/3)] (LEAQ2 <v.Type> x x))
for {
c := v.AuxInt
x := v.Args[0]
if !(c%3 == 0 && isPowerOfTwo(c/3)) {
break
}
v.reset(OpAMD64SHLQconst)
v.AuxInt = log2(c / 3)
v0 := b.NewValue0(v.Line, OpAMD64LEAQ2, v.Type)
v0.AddArg(x)
v0.AddArg(x)
v.AddArg(v0)
return true
}
// match: (MULQconst [c] x)
// cond: c%5 == 0 && isPowerOfTwo(c/5)
// result: (SHLQconst [log2(c/5)] (LEAQ4 <v.Type> x x))
for {
c := v.AuxInt
x := v.Args[0]
if !(c%5 == 0 && isPowerOfTwo(c/5)) {
break
}
v.reset(OpAMD64SHLQconst)
v.AuxInt = log2(c / 5)
v0 := b.NewValue0(v.Line, OpAMD64LEAQ4, v.Type)
v0.AddArg(x)
v0.AddArg(x)
v.AddArg(v0)
return true
}
// match: (MULQconst [c] x)
// cond: c%9 == 0 && isPowerOfTwo(c/9)
// result: (SHLQconst [log2(c/9)] (LEAQ8 <v.Type> x x))
for {
c := v.AuxInt
x := v.Args[0]
if !(c%9 == 0 && isPowerOfTwo(c/9)) {
break
}
v.reset(OpAMD64SHLQconst)
v.AuxInt = log2(c / 9)
v0 := b.NewValue0(v.Line, OpAMD64LEAQ8, v.Type)
v0.AddArg(x)
v0.AddArg(x)
v.AddArg(v0)
return true
}
// match: (MULQconst [c] (MOVQconst [d]))
// cond:
// result: (MOVQconst [c*d])

45
test/strength.go Normal file
View File

@ -0,0 +1,45 @@
// runoutput
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Generate test of strength reduction for multiplications
// with contstants. Especially useful for amd64/386.
package main
import "fmt"
func testMul(fact, bits int) string {
n := fmt.Sprintf("testMul_%d_%d", fact, bits)
fmt.Printf("func %s(s int%d) {\n", n, bits)
want := 0
for i := 0; i < 200; i++ {
fmt.Printf(` if want, got := int%d(%d), s*%d; want != got {
failed = true
fmt.Printf("got %d * %%d == %%d, wanted %d\n", s, got)
}
`, bits, want, i, i, want)
want += fact
}
fmt.Printf("}\n")
return fmt.Sprintf("%s(%d)", n, fact)
}
func main() {
fmt.Printf("package main\n")
fmt.Printf("import \"fmt\"\n")
fmt.Printf("var failed = false\n")
f1 := testMul(17, 32)
f2 := testMul(131, 64)
fmt.Printf("func main() {\n")
fmt.Println(f1)
fmt.Println(f2)
fmt.Printf("if failed {\n panic(\"multiplication failed\")\n}\n")
fmt.Printf("}\n")
}