mirror of
https://github.com/golang/go.git
synced 2024-09-21 18:38:37 +00:00
cmd/compile: optimize math/bits.Div32 for arm64
Benchmark: name old time/op new time/op delta Div-8 22.0ns ± 0% 22.0ns ± 0% ~ (all equal) Div32-8 6.51ns ± 0% 3.00ns ± 0% -53.90% (p=0.000 n=10+8) Div64-8 22.5ns ± 0% 22.5ns ± 0% ~ (all equal) Code: func div32(hi, lo, y uint32) (q, r uint32) {return bits.Div32(hi, lo, y)} Before: 0x0020 00032 (test.go:24) MOVWU "".y+8(FP), R0 0x0024 00036 ($GOROOT/src/math/bits/bits.go:472) CBZW R0, 132 0x0028 00040 ($GOROOT/src/math/bits/bits.go:472) MOVWU "".hi(FP), R1 0x002c 00044 ($GOROOT/src/math/bits/bits.go:472) CMPW R1, R0 0x0030 00048 ($GOROOT/src/math/bits/bits.go:472) BLS 96 0x0034 00052 ($GOROOT/src/math/bits/bits.go:475) MOVWU "".lo+4(FP), R2 0x0038 00056 ($GOROOT/src/math/bits/bits.go:475) ORR R1<<32, R2, R1 0x003c 00060 ($GOROOT/src/math/bits/bits.go:476) CBZ R0, 140 0x0040 00064 ($GOROOT/src/math/bits/bits.go:476) UDIV R0, R1, R2 0x0044 00068 (test.go:24) MOVW R2, "".q+16(FP) 0x0048 00072 ($GOROOT/src/math/bits/bits.go:476) UREM R0, R1, R0 0x0050 00080 (test.go:24) MOVW R0, "".r+20(FP) 0x0054 00084 (test.go:24) MOVD -8(RSP), R29 0x0058 00088 (test.go:24) MOVD.P 32(RSP), R30 0x005c 00092 (test.go:24) RET (R30) After: 0x001c 00028 (test.go:24) MOVWU "".y+8(FP), R0 0x0020 00032 (test.go:24) CBZW R0, 92 0x0024 00036 (test.go:24) MOVWU "".hi(FP), R1 0x0028 00040 (test.go:24) CMPW R0, R1 0x002c 00044 (test.go:24) BHS 84 0x0030 00048 (test.go:24) MOVWU "".lo+4(FP), R2 0x0034 00052 (test.go:24) ORR R1<<32, R2, R4 0x0038 00056 (test.go:24) UDIV R0, R4, R3 0x003c 00060 (test.go:24) MSUB R3, R4, R0, R4 0x0040 00064 (test.go:24) MOVW R3, "".q+16(FP) 0x0044 00068 (test.go:24) MOVW R4, "".r+20(FP) 0x0048 00072 (test.go:24) MOVD -8(RSP), R29 0x004c 00076 (test.go:24) MOVD.P 16(RSP), R30 0x0050 00080 (test.go:24) RET (R30) UREM instruction in the previous assembly code will be converted to UDIV and MSUB instructions on arm64. However the UDIV instruction in UREM is unnecessary, because it's a duplicate of the previous UDIV. This CL adds a rule to have this extra UDIV instruction removed by CSE. Change-Id: Ie2508784320020b2de022806d09f75a7871bb3d7 Reviewed-on: https://go-review.googlesource.com/c/159577 Reviewed-by: Keith Randall <khr@golang.org> Reviewed-by: Cherry Zhang <cherryyz@google.com> Run-TryBot: Bryan C. Mills <bcmills@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
This commit is contained in:
parent
83610c90bb
commit
159b2de442
@ -133,6 +133,13 @@
|
||||
(BitRev16 x) -> (SRLconst [48] (RBIT <typ.UInt64> x))
|
||||
(BitRev8 x) -> (SRLconst [56] (RBIT <typ.UInt64> x))
|
||||
|
||||
// In fact, UMOD will be translated into UREM instruction, and UREM is originally translated into
|
||||
// UDIV and MSUB instructions. But if there is already an identical UDIV instruction just before or
|
||||
// after UREM (case like quo, rem := z/y, z%y), then the second UDIV instruction becomes redundant.
|
||||
// The purpose of this rule is to have this extra UDIV instruction removed in CSE pass.
|
||||
(UMOD <typ.UInt64> x y) -> (MSUB <typ.UInt64> x y (UDIV <typ.UInt64> x y))
|
||||
(UMODW <typ.UInt32> x y) -> (MSUBW <typ.UInt32> x y (UDIVW <typ.UInt32> x y))
|
||||
|
||||
// boolean ops -- booleans are represented with 0=false, 1=true
|
||||
(AndB x y) -> (AND x y)
|
||||
(OrB x y) -> (OR x y)
|
||||
|
@ -30667,6 +30667,30 @@ func rewriteValueARM64_OpARM64UDIVW_0(v *Value) bool {
|
||||
return false
|
||||
}
|
||||
func rewriteValueARM64_OpARM64UMOD_0(v *Value) bool {
|
||||
b := v.Block
|
||||
_ = b
|
||||
typ := &b.Func.Config.Types
|
||||
_ = typ
|
||||
// match: (UMOD <typ.UInt64> x y)
|
||||
// cond:
|
||||
// result: (MSUB <typ.UInt64> x y (UDIV <typ.UInt64> x y))
|
||||
for {
|
||||
if v.Type != typ.UInt64 {
|
||||
break
|
||||
}
|
||||
_ = v.Args[1]
|
||||
x := v.Args[0]
|
||||
y := v.Args[1]
|
||||
v.reset(OpARM64MSUB)
|
||||
v.Type = typ.UInt64
|
||||
v.AddArg(x)
|
||||
v.AddArg(y)
|
||||
v0 := b.NewValue0(v.Pos, OpARM64UDIV, typ.UInt64)
|
||||
v0.AddArg(x)
|
||||
v0.AddArg(y)
|
||||
v.AddArg(v0)
|
||||
return true
|
||||
}
|
||||
// match: (UMOD _ (MOVDconst [1]))
|
||||
// cond:
|
||||
// result: (MOVDconst [0])
|
||||
@ -30724,6 +30748,30 @@ func rewriteValueARM64_OpARM64UMOD_0(v *Value) bool {
|
||||
return false
|
||||
}
|
||||
func rewriteValueARM64_OpARM64UMODW_0(v *Value) bool {
|
||||
b := v.Block
|
||||
_ = b
|
||||
typ := &b.Func.Config.Types
|
||||
_ = typ
|
||||
// match: (UMODW <typ.UInt32> x y)
|
||||
// cond:
|
||||
// result: (MSUBW <typ.UInt32> x y (UDIVW <typ.UInt32> x y))
|
||||
for {
|
||||
if v.Type != typ.UInt32 {
|
||||
break
|
||||
}
|
||||
_ = v.Args[1]
|
||||
x := v.Args[0]
|
||||
y := v.Args[1]
|
||||
v.reset(OpARM64MSUBW)
|
||||
v.Type = typ.UInt32
|
||||
v.AddArg(x)
|
||||
v.AddArg(y)
|
||||
v0 := b.NewValue0(v.Pos, OpARM64UDIVW, typ.UInt32)
|
||||
v0.AddArg(x)
|
||||
v0.AddArg(y)
|
||||
v.AddArg(v0)
|
||||
return true
|
||||
}
|
||||
// match: (UMODW _ (MOVDconst [c]))
|
||||
// cond: uint32(c)==1
|
||||
// result: (MOVDconst [0])
|
||||
|
@ -476,6 +476,11 @@ func Div(hi, lo, x uint) (q, r uint) {
|
||||
return bits.Div(hi, lo, x)
|
||||
}
|
||||
|
||||
func Div32(hi, lo, x uint32) (q, r uint32) {
|
||||
// arm64:"ORR","UDIV","MSUB",-"UREM"
|
||||
return bits.Div32(hi, lo, x)
|
||||
}
|
||||
|
||||
func Div64(hi, lo, x uint64) (q, r uint64) {
|
||||
// amd64:"DIVQ"
|
||||
return bits.Div64(hi, lo, x)
|
||||
|
Loading…
Reference in New Issue
Block a user