go/test/codegen/shift.go
Michael Munday 854e892ce1 cmd/compile: optimize shift pairs and masks on s390x
Optimize combinations of left and right shifts by a constant value
into a 'rotate then insert selected bits [into zero]' instruction.
Use the same instruction for contiguous masks since it has some
benefits over 'and immediate' (not restricted to 32-bits, does not
overwrite source register).

To keep the complexity of this change under control I've only
implemented 64 bit operations for now.

There are a lot more optimizations that can be done with this
instruction family. However, since their function overlaps with other
instructions we need to be somewhat careful not to break existing
optimization rules by creating optimization dead ends. This is
particularly true of the load/store merging rules which contain lots
of zero extensions and shifts.

This CL does interfere with the store merging rules when an operand
is shifted left before it is stored:

  binary.BigEndian.PutUint64(b, x << 1)

This is unfortunate but it's not critical and somewhat complex so
I plan to fix that in a follow up CL.

file      before    after     Δ       %
addr2line 4117446   4117282   -164    -0.004%
api       4945184   4942752   -2432   -0.049%
asm       4998079   4991891   -6188   -0.124%
buildid   2685158   2684074   -1084   -0.040%
cgo       4553732   4553394   -338    -0.007%
compile   19294446  19245070  -49376  -0.256%
cover     4897105   4891319   -5786   -0.118%
dist      3544389   3542785   -1604   -0.045%
doc       3926795   3927617   +822    +0.021%
fix       3302958   3293868   -9090   -0.275%
link      6546274   6543456   -2818   -0.043%
nm        4102021   4100825   -1196   -0.029%
objdump   4542431   4548483   +6052   +0.133%
pack      2482465   2416389   -66076  -2.662%
pprof     13366541  13363915  -2626   -0.020%
test2json 2829007   2761515   -67492  -2.386%
trace     10216164  10219684  +3520   +0.034%
vet       6773956   6773572   -384    -0.006%
total     107124151 106917891 -206260 -0.193%

Change-Id: I7591cce41e06867ba10a745daae9333513062746
Reviewed-on: https://go-review.googlesource.com/c/go/+/233317
Run-TryBot: Michael Munday <mike.munday@ibm.com>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
Trust: Michael Munday <mike.munday@ibm.com>
2020-11-06 10:45:31 +00:00

291 lines
7.4 KiB
Go

// asmcheck
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package codegen
// ------------------ //
// masked shifts //
// ------------------ //
func lshMask64x64(v int64, s uint64) int64 {
// s390x:-"RISBGZ",-"AND",-"LOCGR"
// ppc64le:"ANDCC",-"ORN",-"ISEL"
// ppc64:"ANDCC",-"ORN",-"ISEL"
return v << (s & 63)
}
func rshMask64Ux64(v uint64, s uint64) uint64 {
// s390x:-"RISBGZ",-"AND",-"LOCGR"
// ppc64le:"ANDCC",-"ORN",-"ISEL"
// ppc64:"ANDCC",-"ORN",-"ISEL"
return v >> (s & 63)
}
func rshMask64x64(v int64, s uint64) int64 {
// s390x:-"RISBGZ",-"AND",-"LOCGR"
// ppc64le:"ANDCC",-ORN",-"ISEL"
// ppc64:"ANDCC",-"ORN",-"ISEL"
return v >> (s & 63)
}
func lshMask32x64(v int32, s uint64) int32 {
// s390x:-"RISBGZ",-"AND",-"LOCGR"
// ppc64le:"ISEL",-"ORN"
// ppc64:"ISEL",-"ORN"
return v << (s & 63)
}
func rshMask32Ux64(v uint32, s uint64) uint32 {
// s390x:-"RISBGZ",-"AND",-"LOCGR"
// ppc64le:"ISEL",-"ORN"
// ppc64:"ISEL",-"ORN"
return v >> (s & 63)
}
func rshMask32x64(v int32, s uint64) int32 {
// s390x:-"RISBGZ",-"AND",-"LOCGR"
// ppc64le:"ISEL",-"ORN"
// ppc64:"ISEL",-"ORN"
return v >> (s & 63)
}
func lshMask64x32(v int64, s uint32) int64 {
// s390x:-"RISBGZ",-"AND",-"LOCGR"
// ppc64le:"ANDCC",-"ORN"
// ppc64:"ANDCC",-"ORN"
return v << (s & 63)
}
func rshMask64Ux32(v uint64, s uint32) uint64 {
// s390x:-"RISBGZ",-"AND",-"LOCGR"
// ppc64le:"ANDCC",-"ORN"
// ppc64:"ANDCC",-"ORN"
return v >> (s & 63)
}
func rshMask64x32(v int64, s uint32) int64 {
// s390x:-"RISBGZ",-"AND",-"LOCGR"
// ppc64le:"ANDCC",-"ORN",-"ISEL"
// ppc64:"ANDCC",-"ORN",-"ISEL"
return v >> (s & 63)
}
func lshMask64x32Ext(v int64, s int32) int64 {
// s390x:-"RISBGZ",-"AND",-"LOCGR"
// ppc64le:"ANDCC",-"ORN",-"ISEL"
// ppc64:"ANDCC",-"ORN",-"ISEL"
return v << uint(s&63)
}
func rshMask64Ux32Ext(v uint64, s int32) uint64 {
// s390x:-"RISBGZ",-"AND",-"LOCGR"
// ppc64le:"ANDCC",-"ORN",-"ISEL"
// ppc64:"ANDCC",-"ORN",-"ISEL"
return v >> uint(s&63)
}
func rshMask64x32Ext(v int64, s int32) int64 {
// s390x:-"RISBGZ",-"AND",-"LOCGR"
// ppc64le:"ANDCC",-"ORN",-"ISEL"
// ppc64:"ANDCC",-"ORN",-"ISEL"
return v >> uint(s&63)
}
// --------------- //
// signed shifts //
// --------------- //
// We do want to generate a test + panicshift for these cases.
func lshSigned(v8 int8, v16 int16, v32 int32, v64 int64, x int) {
// amd64:"TESTB"
_ = x << v8
// amd64:"TESTW"
_ = x << v16
// amd64:"TESTL"
_ = x << v32
// amd64:"TESTQ"
_ = x << v64
}
// We want to avoid generating a test + panicshift for these cases.
func lshSignedMasked(v8 int8, v16 int16, v32 int32, v64 int64, x int) {
// amd64:-"TESTB"
_ = x << (v8 & 7)
// amd64:-"TESTW"
_ = x << (v16 & 15)
// amd64:-"TESTL"
_ = x << (v32 & 31)
// amd64:-"TESTQ"
_ = x << (v64 & 63)
}
// ------------------ //
// bounded shifts //
// ------------------ //
func rshGuarded64(v int64, s uint) int64 {
if s < 64 {
// s390x:-"RISBGZ",-"AND",-"LOCGR"
// wasm:-"Select",-".*LtU"
return v >> s
}
panic("shift too large")
}
func rshGuarded64U(v uint64, s uint) uint64 {
if s < 64 {
// s390x:-"RISBGZ",-"AND",-"LOCGR"
// wasm:-"Select",-".*LtU"
return v >> s
}
panic("shift too large")
}
func lshGuarded64(v int64, s uint) int64 {
if s < 64 {
// s390x:-"RISBGZ",-"AND",-"LOCGR"
// wasm:-"Select",-".*LtU"
return v << s
}
panic("shift too large")
}
func checkUnneededTrunc(tab *[100000]uint32, d uint64, v uint32, h uint16, b byte) (uint32, uint64) {
// ppc64le:-".*RLWINM",-".*RLDICR",".*CLRLSLDI"
// ppc64:-".*RLWINM",-".*RLDICR",".*CLRLSLDI"
f := tab[byte(v)^b]
// ppc64le:-".*RLWINM",-".*RLDICR",".*CLRLSLDI"
// ppc64:-".*RLWINM",-".*RLDICR",".*CLRLSLDI"
f += tab[byte(v)&b]
// ppc64le:-".*RLWINM",-".*RLDICR",".*CLRLSLDI"
// ppc64:-".*RLWINM",-".*RLDICR",".*CLRLSLDI"
f += tab[byte(v)|b]
// ppc64le:-".*RLWINM",-".*RLDICR",".*CLRLSLDI"
// ppc64:-".*RLWINM",-".*RLDICR",".*CLRLSLDI"
f += tab[uint16(v)&h]
// ppc64le:-".*RLWINM",-".*RLDICR",".*CLRLSLDI"
// ppc64:-".*RLWINM",-".*RLDICR",".*CLRLSLDI"
f += tab[uint16(v)^h]
// ppc64le:-".*RLWINM",-".*RLDICR",".*CLRLSLDI"
// ppc64:-".*RLWINM",-".*RLDICR",".*CLRLSLDI"
f += tab[uint16(v)|h]
// ppc64le:-".*AND",-"RLDICR",".*CLRLSLDI"
// ppc64:-".*AND",-"RLDICR",".*CLRLSLDI"
f += tab[v&0xff]
// ppc64le:-".*AND",".*CLRLSLWI"
// ppc64:-".*AND",".*CLRLSLWI"
f += 2 * uint32(uint16(d))
// ppc64le:-".*AND",-"RLDICR",".*CLRLSLDI"
// ppc64:-".*AND",-"RLDICR",".*CLRLSLDI"
g := 2 * uint64(uint32(d))
return f, g
}
func checkCombinedShifts(v8 uint8, v16 uint16, v32 uint32, x32 int32, v64 uint64) (uint8, uint16, uint32, uint64, int64) {
// ppc64le:-"AND","CLRLSLWI"
// ppc64:-"AND","CLRLSLWI"
f := (v8 & 0xF) << 2
// ppc64le:"CLRLSLWI"
// ppc64:"CLRLSLWI"
f += byte(v16) << 3
// ppc64le:-"AND","CLRLSLWI"
// ppc64:-"AND","CLRLSLWI"
g := (v16 & 0xFF) << 3
// ppc64le:-"AND","CLRLSLWI"
// ppc64:-"AND","CLRLSLWI"
h := (v32 & 0xFFFFF) << 2
// ppc64le:"CLRLSLDI"
// ppc64:"CLRLSLDI"
i := (v64 & 0xFFFFFFFF) << 5
// ppc64le:-"CLRLSLDI"
// ppc64:-"CLRLSLDI"
i += (v64 & 0xFFFFFFF) << 38
// ppc64le/power9:-"CLRLSLDI"
// ppc64/power9:-"CLRLSLDI"
i += (v64 & 0xFFFF00) << 10
// ppc64le/power9:-"SLD","EXTSWSLI"
// ppc64/power9:-"SLD","EXTSWSLI"
j := int64(x32+32) * 8
return f, g, h, i, j
}
func checkWidenAfterShift(v int64, u uint64) (int64, uint64) {
// ppc64le:-".*MOVW"
f := int32(v >> 32)
// ppc64le:".*MOVW"
f += int32(v >> 31)
// ppc64le:-".*MOVH"
g := int16(v >> 48)
// ppc64le:".*MOVH"
g += int16(v >> 30)
// ppc64le:-".*MOVH"
g += int16(f >> 16)
// ppc64le:-".*MOVB"
h := int8(v >> 56)
// ppc64le:".*MOVB"
h += int8(v >> 28)
// ppc64le:-".*MOVB"
h += int8(f >> 24)
// ppc64le:".*MOVB"
h += int8(f >> 16)
return int64(h), uint64(g)
}
func checkShiftAndMask32(v []uint32) {
i := 0
// ppc64le: "RLWNM\t[$]24, R[0-9]+, [$]1044480, R[0-9]+"
// ppc64: "RLWNM\t[$]24, R[0-9]+, [$]1044480, R[0-9]+"
v[i] = (v[i] & 0xFF00000) >> 8
i++
// ppc64le: "RLWNM\t[$]26, R[0-9]+, [$]1020, R[0-9]+"
// ppc64: "RLWNM\t[$]26, R[0-9]+, [$]1020, R[0-9]+"
v[i] = (v[i] & 0xFF00) >> 6
i++
// ppc64le: "MOVW\tR0"
// ppc64: "MOVW\tR0"
v[i] = (v[i] & 0xFF) >> 8
i++
// ppc64le: "MOVW\tR0"
// ppc64: "MOVW\tR0"
v[i] = (v[i] & 0xF000000) >> 28
i++
// ppc64le: "RLWNM\t[$]26, R[0-9]+, [$]255, R[0-9]+"
// ppc64: "RLWNM\t[$]26, R[0-9]+, [$]255, R[0-9]+"
v[i] = (v[i] >> 6) & 0xFF
i++
// ppc64le: "RLWNM\t[$]26, R[0-9]+, [$]1044480, R[0-9]+"
// ppc64: "RLWNM\t[$]26, R[0-9]+, [$]1044480, R[0-9]+"
v[i] = (v[i] >> 6) & 0xFF000
i++
// ppc64le: "MOVW\tR0"
// ppc64: "MOVW\tR0"
v[i] = (v[i] >> 20) & 0xFF000
i++
// ppc64le: "MOVW\tR0"
// ppc64: "MOVW\tR0"
v[i] = (v[i] >> 24) & 0xFF00
i++
}
func checkMergedShifts32(a [256]uint32, b [256]uint64, u uint32, v uint32) {
//ppc64le: -"CLRLSLDI", "RLWNM\t[$]10, R[0-9]+, [$]1020, R[0-9]+"
//ppc64: -"CLRLSLDI", "RLWNM\t[$]10, R[0-9]+, [$]1020, R[0-9]+"
a[0] = a[uint8(v>>24)]
//ppc64le: -"CLRLSLDI", "RLWNM\t[$]11, R[0-9]+, [$]2040, R[0-9]+"
//ppc64: -"CLRLSLDI", "RLWNM\t[$]11, R[0-9]+, [$]2040, R[0-9]+"
b[0] = b[uint8(v>>24)]
//ppc64le: -"CLRLSLDI", "RLWNM\t[$]15, R[0-9]+, [$]2040, R[0-9]+"
//ppc64: -"CLRLSLDI", "RLWNM\t[$]15, R[0-9]+, [$]2040, R[0-9]+"
b[1] = b[(v>>20)&0xFF]
//ppc64le: -"SLD", "RLWNM\t[$]10, R[0-9]+, [$]1016, R[0-9]+"
//ppc64: -"SLD", "RLWNM\t[$]10, R[0-9]+, [$]1016, R[0-9]+"
b[2] = b[v>>25]
}