cmd/compile: remove bit operations that modify memory directly

These operations (BT{S,R,C}{Q,L}modify) are quite a bit slower than
other ways of doing the same thing.

Without the BTxmodify operations, there are two fallback ways the compiler
performs these operations: AND/OR/XOR operations directly on memory, or
load-BTx-write sequences. The compiler kinda chooses one arbitrarily
depending on rewrite rule application order. Currently, it uses
load-BTx-write for the Const benchmarks and AND/OR/XOR directly to memory
for the non-Const benchmarks. TBD, someone might investigate which of
the two fallback strategies is really better. For now, they are both
better than BTx ops.

name              old time/op  new time/op  delta
BitSet-8          1.09µs ± 2%  0.64µs ± 5%  -41.60%  (p=0.000 n=9+10)
BitClear-8        1.15µs ± 3%  0.68µs ± 6%  -41.00%  (p=0.000 n=10+10)
BitToggle-8       1.18µs ± 4%  0.73µs ± 2%  -38.36%  (p=0.000 n=10+8)
BitSetConst-8     37.0ns ± 7%  25.8ns ± 2%  -30.24%  (p=0.000 n=10+10)
BitClearConst-8   30.7ns ± 2%  25.0ns ±12%  -18.46%  (p=0.000 n=10+10)
BitToggleConst-8  36.9ns ± 1%  23.8ns ± 3%  -35.46%  (p=0.000 n=9+10)

Fixes #45790
Update #45242

Change-Id: Ie33a72dc139f261af82db15d446cd0855afb4e59
Reviewed-on: https://go-review.googlesource.com/c/go/+/318149
Trust: Keith Randall <khr@golang.org>
Run-TryBot: Keith Randall <khr@golang.org>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Ben Shi <powerman1st@163.com>
This commit is contained in:
Keith Randall 2021-05-07 14:14:39 -07:00 committed by Ben Shi
parent f24eac4771
commit b211fe0058
7 changed files with 81 additions and 1385 deletions

View File

@ -756,7 +756,6 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
p.To.Type = obj.TYPE_REG
p.To.Reg = v.Reg()
case ssa.OpAMD64MOVQstore, ssa.OpAMD64MOVSSstore, ssa.OpAMD64MOVSDstore, ssa.OpAMD64MOVLstore, ssa.OpAMD64MOVWstore, ssa.OpAMD64MOVBstore, ssa.OpAMD64MOVOstore,
ssa.OpAMD64BTCQmodify, ssa.OpAMD64BTCLmodify, ssa.OpAMD64BTRQmodify, ssa.OpAMD64BTRLmodify, ssa.OpAMD64BTSQmodify, ssa.OpAMD64BTSLmodify,
ssa.OpAMD64ADDQmodify, ssa.OpAMD64SUBQmodify, ssa.OpAMD64ANDQmodify, ssa.OpAMD64ORQmodify, ssa.OpAMD64XORQmodify,
ssa.OpAMD64ADDLmodify, ssa.OpAMD64SUBLmodify, ssa.OpAMD64ANDLmodify, ssa.OpAMD64ORLmodify, ssa.OpAMD64XORLmodify:
p := s.Prog(v.Op.Asm())
@ -804,8 +803,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
}
fallthrough
case ssa.OpAMD64ANDQconstmodify, ssa.OpAMD64ANDLconstmodify, ssa.OpAMD64ORQconstmodify, ssa.OpAMD64ORLconstmodify,
ssa.OpAMD64BTCQconstmodify, ssa.OpAMD64BTCLconstmodify, ssa.OpAMD64BTSQconstmodify, ssa.OpAMD64BTSLconstmodify,
ssa.OpAMD64BTRQconstmodify, ssa.OpAMD64BTRLconstmodify, ssa.OpAMD64XORQconstmodify, ssa.OpAMD64XORLconstmodify:
ssa.OpAMD64XORQconstmodify, ssa.OpAMD64XORLconstmodify:
sc := v.AuxValAndOff()
off := sc.Off64()
val := sc.Val64()

View File

@ -624,14 +624,6 @@
// Recognize bit setting (a |= 1<<b) and toggling (a ^= 1<<b)
(OR(Q|L) (SHL(Q|L) (MOV(Q|L)const [1]) y) x) => (BTS(Q|L) x y)
(XOR(Q|L) (SHL(Q|L) (MOV(Q|L)const [1]) y) x) => (BTC(Q|L) x y)
(ORLmodify [off] {sym} ptr s:(SHLL (MOVLconst [1]) <t> x) mem) =>
(BTSLmodify [off] {sym} ptr (ANDLconst <t> [31] x) mem)
(ORQmodify [off] {sym} ptr s:(SHLQ (MOVQconst [1]) <t> x) mem) =>
(BTSQmodify [off] {sym} ptr (ANDQconst <t> [63] x) mem)
(XORLmodify [off] {sym} ptr s:(SHLL (MOVLconst [1]) <t> x) mem) =>
(BTCLmodify [off] {sym} ptr (ANDLconst <t> [31] x) mem)
(XORQmodify [off] {sym} ptr s:(SHLQ (MOVQconst [1]) <t> x) mem) =>
(BTCQmodify [off] {sym} ptr (ANDQconst <t> [63] x) mem)
// Convert ORconst into BTS, if the code gets smaller, with boundary being
// (ORL $40,AX is 3 bytes, ORL $80,AX is 6 bytes).
@ -654,10 +646,6 @@
=> (BTRQconst [int8(log64(^c))] x)
(ANDL (MOVLconst [c]) x) && isUint32PowerOfTwo(int64(^c)) && uint64(^c) >= 128
=> (BTRLconst [int8(log32(^c))] x)
(ANDLmodify [off] {sym} ptr (NOTL s:(SHLL (MOVLconst [1]) <t> x)) mem) =>
(BTRLmodify [off] {sym} ptr (ANDLconst <t> [31] x) mem)
(ANDQmodify [off] {sym} ptr (NOTQ s:(SHLQ (MOVQconst [1]) <t> x)) mem) =>
(BTRQmodify [off] {sym} ptr (ANDQconst <t> [63] x) mem)
// Special-case bit patterns on first/last bit.
// generic.rules changes ANDs of high-part/low-part masks into a couple of shifts,
@ -1126,14 +1114,14 @@
((ADD|SUB|MUL|DIV)SSload [off1+off2] {sym} val base mem)
((ADD|SUB|MUL|DIV)SDload [off1] {sym} val (ADDQconst [off2] base) mem) && is32Bit(int64(off1)+int64(off2)) =>
((ADD|SUB|MUL|DIV)SDload [off1+off2] {sym} val base mem)
((ADD|AND|OR|XOR|BTC|BTR|BTS)Qconstmodify [valoff1] {sym} (ADDQconst [off2] base) mem) && ValAndOff(valoff1).canAdd32(off2) =>
((ADD|AND|OR|XOR|BTC|BTR|BTS)Qconstmodify [ValAndOff(valoff1).addOffset32(off2)] {sym} base mem)
((ADD|AND|OR|XOR|BTC|BTR|BTS)Lconstmodify [valoff1] {sym} (ADDQconst [off2] base) mem) && ValAndOff(valoff1).canAdd32(off2) =>
((ADD|AND|OR|XOR|BTC|BTR|BTS)Lconstmodify [ValAndOff(valoff1).addOffset32(off2)] {sym} base mem)
((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Qmodify [off1] {sym} (ADDQconst [off2] base) val mem) && is32Bit(int64(off1)+int64(off2)) =>
((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Qmodify [off1+off2] {sym} base val mem)
((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Lmodify [off1] {sym} (ADDQconst [off2] base) val mem) && is32Bit(int64(off1)+int64(off2)) =>
((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Lmodify [off1+off2] {sym} base val mem)
((ADD|AND|OR|XOR)Qconstmodify [valoff1] {sym} (ADDQconst [off2] base) mem) && ValAndOff(valoff1).canAdd32(off2) =>
((ADD|AND|OR|XOR)Qconstmodify [ValAndOff(valoff1).addOffset32(off2)] {sym} base mem)
((ADD|AND|OR|XOR)Lconstmodify [valoff1] {sym} (ADDQconst [off2] base) mem) && ValAndOff(valoff1).canAdd32(off2) =>
((ADD|AND|OR|XOR)Lconstmodify [ValAndOff(valoff1).addOffset32(off2)] {sym} base mem)
((ADD|SUB|AND|OR|XOR)Qmodify [off1] {sym} (ADDQconst [off2] base) val mem) && is32Bit(int64(off1)+int64(off2)) =>
((ADD|SUB|AND|OR|XOR)Qmodify [off1+off2] {sym} base val mem)
((ADD|SUB|AND|OR|XOR)Lmodify [off1] {sym} (ADDQconst [off2] base) val mem) && is32Bit(int64(off1)+int64(off2)) =>
((ADD|SUB|AND|OR|XOR)Lmodify [off1+off2] {sym} base val mem)
// Fold constants into stores.
(MOVQstore [off] {sym} ptr (MOVQconst [c]) mem) && validVal(c) =>
@ -1181,18 +1169,18 @@
((ADD|SUB|MUL|DIV)SDload [off1] {sym1} val (LEAQ [off2] {sym2} base) mem)
&& is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) =>
((ADD|SUB|MUL|DIV)SDload [off1+off2] {mergeSym(sym1,sym2)} val base mem)
((ADD|AND|OR|XOR|BTC|BTR|BTS)Qconstmodify [valoff1] {sym1} (LEAQ [off2] {sym2} base) mem)
((ADD|AND|OR|XOR)Qconstmodify [valoff1] {sym1} (LEAQ [off2] {sym2} base) mem)
&& ValAndOff(valoff1).canAdd32(off2) && canMergeSym(sym1, sym2) =>
((ADD|AND|OR|XOR|BTC|BTR|BTS)Qconstmodify [ValAndOff(valoff1).addOffset32(off2)] {mergeSym(sym1,sym2)} base mem)
((ADD|AND|OR|XOR|BTC|BTR|BTS)Lconstmodify [valoff1] {sym1} (LEAQ [off2] {sym2} base) mem)
((ADD|AND|OR|XOR)Qconstmodify [ValAndOff(valoff1).addOffset32(off2)] {mergeSym(sym1,sym2)} base mem)
((ADD|AND|OR|XOR)Lconstmodify [valoff1] {sym1} (LEAQ [off2] {sym2} base) mem)
&& ValAndOff(valoff1).canAdd32(off2) && canMergeSym(sym1, sym2) =>
((ADD|AND|OR|XOR|BTC|BTR|BTS)Lconstmodify [ValAndOff(valoff1).addOffset32(off2)] {mergeSym(sym1,sym2)} base mem)
((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Qmodify [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
((ADD|AND|OR|XOR)Lconstmodify [ValAndOff(valoff1).addOffset32(off2)] {mergeSym(sym1,sym2)} base mem)
((ADD|SUB|AND|OR|XOR)Qmodify [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
&& is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) =>
((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Qmodify [off1+off2] {mergeSym(sym1,sym2)} base val mem)
((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Lmodify [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
((ADD|SUB|AND|OR|XOR)Qmodify [off1+off2] {mergeSym(sym1,sym2)} base val mem)
((ADD|SUB|AND|OR|XOR)Lmodify [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
&& is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) =>
((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Lmodify [off1+off2] {mergeSym(sym1,sym2)} base val mem)
((ADD|SUB|AND|OR|XOR)Lmodify [off1+off2] {mergeSym(sym1,sym2)} base val mem)
// fold LEAQs together
(LEAQ [off1] {sym1} (LEAQ [off2] {sym2} x)) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) =>
@ -2078,13 +2066,9 @@
(MOVLstore {sym} [off] ptr y:((ADD|AND|OR|XOR)Lload x [off] {sym} ptr mem) mem) && y.Uses==1 && clobber(y) => ((ADD|AND|OR|XOR)Lmodify [off] {sym} ptr x mem)
(MOVLstore {sym} [off] ptr y:((ADD|SUB|AND|OR|XOR)L l:(MOVLload [off] {sym} ptr mem) x) mem) && y.Uses==1 && l.Uses==1 && clobber(y, l) =>
((ADD|SUB|AND|OR|XOR)Lmodify [off] {sym} ptr x mem)
(MOVLstore {sym} [off] ptr y:((BTC|BTR|BTS)L l:(MOVLload [off] {sym} ptr mem) <t> x) mem) && y.Uses==1 && l.Uses==1 && clobber(y, l) =>
((BTC|BTR|BTS)Lmodify [off] {sym} ptr (ANDLconst <t> [31] x) mem)
(MOVQstore {sym} [off] ptr y:((ADD|AND|OR|XOR)Qload x [off] {sym} ptr mem) mem) && y.Uses==1 && clobber(y) => ((ADD|AND|OR|XOR)Qmodify [off] {sym} ptr x mem)
(MOVQstore {sym} [off] ptr y:((ADD|SUB|AND|OR|XOR)Q l:(MOVQload [off] {sym} ptr mem) x) mem) && y.Uses==1 && l.Uses==1 && clobber(y, l) =>
((ADD|SUB|AND|OR|XOR)Qmodify [off] {sym} ptr x mem)
(MOVQstore {sym} [off] ptr y:((BTC|BTR|BTS)Q l:(MOVQload [off] {sym} ptr mem) <t> x) mem) && y.Uses==1 && l.Uses==1 && clobber(y, l) =>
((BTC|BTR|BTS)Qmodify [off] {sym} ptr (ANDQconst <t> [63] x) mem)
// Merge ADDQconst and LEAQ into atomic loads.
(MOV(Q|L|B)atomicload [off1] {sym} (ADDQconst [off2] ptr) mem) && is32Bit(int64(off1)+int64(off2)) =>
@ -2138,12 +2122,12 @@
(MOVWQZX (MOVBQZX x)) => (MOVBQZX x)
(MOVBQZX (MOVBQZX x)) => (MOVBQZX x)
(MOVQstore [off] {sym} ptr a:((ADD|AND|OR|XOR|BTC|BTR|BTS)Qconst [c] l:(MOVQload [off] {sym} ptr2 mem)) mem)
(MOVQstore [off] {sym} ptr a:((ADD|AND|OR|XOR)Qconst [c] l:(MOVQload [off] {sym} ptr2 mem)) mem)
&& isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && clobber(l, a) =>
((ADD|AND|OR|XOR|BTC|BTR|BTS)Qconstmodify {sym} [makeValAndOff(int32(c),off)] ptr mem)
(MOVLstore [off] {sym} ptr a:((ADD|AND|OR|XOR|BTC|BTR|BTS)Lconst [c] l:(MOVLload [off] {sym} ptr2 mem)) mem)
((ADD|AND|OR|XOR)Qconstmodify {sym} [makeValAndOff(int32(c),off)] ptr mem)
(MOVLstore [off] {sym} ptr a:((ADD|AND|OR|XOR)Lconst [c] l:(MOVLload [off] {sym} ptr2 mem)) mem)
&& isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && clobber(l, a) =>
((ADD|AND|OR|XOR|BTC|BTR|BTS)Lconstmodify {sym} [makeValAndOff(int32(c),off)] ptr mem)
((ADD|AND|OR|XOR)Lconstmodify {sym} [makeValAndOff(int32(c),off)] ptr mem)
// float <-> int register moves, with no conversion.
// These come up when compiling math.{Float{32,64}bits,Float{32,64}frombits}.

View File

@ -362,25 +362,6 @@ func init() {
{name: "BTSLconst", argLength: 1, reg: gp11, asm: "BTSL", resultInArg0: true, clobberFlags: true, aux: "Int8"}, // set bit auxint in arg0, 0 <= auxint < 32
{name: "BTSQconst", argLength: 1, reg: gp11, asm: "BTSQ", resultInArg0: true, clobberFlags: true, aux: "Int8"}, // set bit auxint in arg0, 0 <= auxint < 64
// direct bit operation on memory operand
//
// Note that these operations do not mask the bit offset (arg1), and will write beyond their expected
// bounds if that argument is larger than 64/32 (for BT*Q and BT*L, respectively). If the compiler
// cannot prove that arg1 is in range, it must be explicitly masked (see e.g. the patterns that produce
// BT*modify from (MOVstore (BT* (MOVLload ptr mem) x) mem)).
{name: "BTCQmodify", argLength: 3, reg: gpstore, asm: "BTCQ", aux: "SymOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // complement bit arg1 in 64-bit arg0+auxint+aux, arg2=mem
{name: "BTCLmodify", argLength: 3, reg: gpstore, asm: "BTCL", aux: "SymOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // complement bit arg1 in 32-bit arg0+auxint+aux, arg2=mem
{name: "BTSQmodify", argLength: 3, reg: gpstore, asm: "BTSQ", aux: "SymOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // set bit arg1 in 64-bit arg0+auxint+aux, arg2=mem
{name: "BTSLmodify", argLength: 3, reg: gpstore, asm: "BTSL", aux: "SymOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // set bit arg1 in 32-bit arg0+auxint+aux, arg2=mem
{name: "BTRQmodify", argLength: 3, reg: gpstore, asm: "BTRQ", aux: "SymOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // reset bit arg1 in 64-bit arg0+auxint+aux, arg2=mem
{name: "BTRLmodify", argLength: 3, reg: gpstore, asm: "BTRL", aux: "SymOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // reset bit arg1 in 32-bit arg0+auxint+aux, arg2=mem
{name: "BTCQconstmodify", argLength: 2, reg: gpstoreconst, asm: "BTCQ", aux: "SymValAndOff", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // complement bit ValAndOff(AuxInt).Val() in 64-bit arg0+ValAndOff(AuxInt).Off()+aux, arg1=mem
{name: "BTCLconstmodify", argLength: 2, reg: gpstoreconst, asm: "BTCL", aux: "SymValAndOff", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // complement bit ValAndOff(AuxInt).Val() in 32-bit arg0+ValAndOff(AuxInt).Off()+aux, arg1=mem
{name: "BTSQconstmodify", argLength: 2, reg: gpstoreconst, asm: "BTSQ", aux: "SymValAndOff", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // set bit ValAndOff(AuxInt).Val() in 64-bit arg0+ValAndOff(AuxInt).Off()+aux, arg1=mem
{name: "BTSLconstmodify", argLength: 2, reg: gpstoreconst, asm: "BTSL", aux: "SymValAndOff", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // set bit ValAndOff(AuxInt).Val() in 32-bit arg0+ValAndOff(AuxInt).Off()+aux, arg1=mem
{name: "BTRQconstmodify", argLength: 2, reg: gpstoreconst, asm: "BTRQ", aux: "SymValAndOff", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // reset bit ValAndOff(AuxInt).Val() in 64-bit arg0+ValAndOff(AuxInt).Off()+aux, arg1=mem
{name: "BTRLconstmodify", argLength: 2, reg: gpstoreconst, asm: "BTRL", aux: "SymValAndOff", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // reset bit ValAndOff(AuxInt).Val() in 32-bit arg0+ValAndOff(AuxInt).Off()+aux, arg1=mem
{name: "TESTQ", argLength: 2, reg: gp2flags, commutative: true, asm: "TESTQ", typ: "Flags"}, // (arg0 & arg1) compare to 0
{name: "TESTL", argLength: 2, reg: gp2flags, commutative: true, asm: "TESTL", typ: "Flags"}, // (arg0 & arg1) compare to 0
{name: "TESTW", argLength: 2, reg: gp2flags, commutative: true, asm: "TESTW", typ: "Flags"}, // (arg0 & arg1) compare to 0

View File

@ -692,18 +692,6 @@ const (
OpAMD64BTRQconst
OpAMD64BTSLconst
OpAMD64BTSQconst
OpAMD64BTCQmodify
OpAMD64BTCLmodify
OpAMD64BTSQmodify
OpAMD64BTSLmodify
OpAMD64BTRQmodify
OpAMD64BTRLmodify
OpAMD64BTCQconstmodify
OpAMD64BTCLconstmodify
OpAMD64BTSQconstmodify
OpAMD64BTSLconstmodify
OpAMD64BTRQconstmodify
OpAMD64BTRLconstmodify
OpAMD64TESTQ
OpAMD64TESTL
OpAMD64TESTW
@ -8521,180 +8509,6 @@ var opcodeTable = [...]opInfo{
},
},
},
{
name: "BTCQmodify",
auxType: auxSymOff,
argLen: 3,
clobberFlags: true,
faultOnNilArg0: true,
symEffect: SymRead | SymWrite,
asm: x86.ABTCQ,
reg: regInfo{
inputs: []inputInfo{
{1, 49151}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15
{0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 g R15 SB
},
},
},
{
name: "BTCLmodify",
auxType: auxSymOff,
argLen: 3,
clobberFlags: true,
faultOnNilArg0: true,
symEffect: SymRead | SymWrite,
asm: x86.ABTCL,
reg: regInfo{
inputs: []inputInfo{
{1, 49151}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15
{0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 g R15 SB
},
},
},
{
name: "BTSQmodify",
auxType: auxSymOff,
argLen: 3,
clobberFlags: true,
faultOnNilArg0: true,
symEffect: SymRead | SymWrite,
asm: x86.ABTSQ,
reg: regInfo{
inputs: []inputInfo{
{1, 49151}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15
{0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 g R15 SB
},
},
},
{
name: "BTSLmodify",
auxType: auxSymOff,
argLen: 3,
clobberFlags: true,
faultOnNilArg0: true,
symEffect: SymRead | SymWrite,
asm: x86.ABTSL,
reg: regInfo{
inputs: []inputInfo{
{1, 49151}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15
{0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 g R15 SB
},
},
},
{
name: "BTRQmodify",
auxType: auxSymOff,
argLen: 3,
clobberFlags: true,
faultOnNilArg0: true,
symEffect: SymRead | SymWrite,
asm: x86.ABTRQ,
reg: regInfo{
inputs: []inputInfo{
{1, 49151}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15
{0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 g R15 SB
},
},
},
{
name: "BTRLmodify",
auxType: auxSymOff,
argLen: 3,
clobberFlags: true,
faultOnNilArg0: true,
symEffect: SymRead | SymWrite,
asm: x86.ABTRL,
reg: regInfo{
inputs: []inputInfo{
{1, 49151}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15
{0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 g R15 SB
},
},
},
{
name: "BTCQconstmodify",
auxType: auxSymValAndOff,
argLen: 2,
clobberFlags: true,
faultOnNilArg0: true,
symEffect: SymRead | SymWrite,
asm: x86.ABTCQ,
reg: regInfo{
inputs: []inputInfo{
{0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 g R15 SB
},
},
},
{
name: "BTCLconstmodify",
auxType: auxSymValAndOff,
argLen: 2,
clobberFlags: true,
faultOnNilArg0: true,
symEffect: SymRead | SymWrite,
asm: x86.ABTCL,
reg: regInfo{
inputs: []inputInfo{
{0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 g R15 SB
},
},
},
{
name: "BTSQconstmodify",
auxType: auxSymValAndOff,
argLen: 2,
clobberFlags: true,
faultOnNilArg0: true,
symEffect: SymRead | SymWrite,
asm: x86.ABTSQ,
reg: regInfo{
inputs: []inputInfo{
{0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 g R15 SB
},
},
},
{
name: "BTSLconstmodify",
auxType: auxSymValAndOff,
argLen: 2,
clobberFlags: true,
faultOnNilArg0: true,
symEffect: SymRead | SymWrite,
asm: x86.ABTSL,
reg: regInfo{
inputs: []inputInfo{
{0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 g R15 SB
},
},
},
{
name: "BTRQconstmodify",
auxType: auxSymValAndOff,
argLen: 2,
clobberFlags: true,
faultOnNilArg0: true,
symEffect: SymRead | SymWrite,
asm: x86.ABTRQ,
reg: regInfo{
inputs: []inputInfo{
{0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 g R15 SB
},
},
},
{
name: "BTRLconstmodify",
auxType: auxSymValAndOff,
argLen: 2,
clobberFlags: true,
faultOnNilArg0: true,
symEffect: SymRead | SymWrite,
asm: x86.ABTRL,
reg: regInfo{
inputs: []inputInfo{
{0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 g R15 SB
},
},
},
{
name: "TESTQ",
argLen: 2,

File diff suppressed because it is too large Load Diff

View File

@ -62,3 +62,63 @@ func BenchmarkConstModify(b *testing.B) {
}
}
}
func BenchmarkBitSet(b *testing.B) {
const N = 64 * 8
a := make([]uint64, N/64)
for i := 0; i < b.N; i++ {
for j := uint64(0); j < N; j++ {
a[j/64] |= 1 << (j % 64)
}
}
}
func BenchmarkBitClear(b *testing.B) {
const N = 64 * 8
a := make([]uint64, N/64)
for i := 0; i < b.N; i++ {
for j := uint64(0); j < N; j++ {
a[j/64] &^= 1 << (j % 64)
}
}
}
func BenchmarkBitToggle(b *testing.B) {
const N = 64 * 8
a := make([]uint64, N/64)
for i := 0; i < b.N; i++ {
for j := uint64(0); j < N; j++ {
a[j/64] ^= 1 << (j % 64)
}
}
}
func BenchmarkBitSetConst(b *testing.B) {
const N = 64
a := make([]uint64, N)
for i := 0; i < b.N; i++ {
for j := range a {
a[j] |= 1 << 37
}
}
}
func BenchmarkBitClearConst(b *testing.B) {
const N = 64
a := make([]uint64, N)
for i := 0; i < b.N; i++ {
for j := range a {
a[j] &^= 1 << 37
}
}
}
func BenchmarkBitToggleConst(b *testing.B) {
const N = 64
a := make([]uint64, N)
for i := 0; i < b.N; i++ {
for j := range a {
a[j] ^= 1 << 37
}
}
}

View File

@ -270,18 +270,6 @@ func bitOpOnMem(a []uint32, b, c, d uint32) {
a[1] |= 220
// amd64:`XORL\s[$]240,\s8\([A-Z][A-Z0-9]+\)`
a[2] ^= 240
// amd64:`BTRL\s[$]15,\s12\([A-Z][A-Z0-9]+\)`,-`ANDL`
a[3] &= 0xffff7fff
// amd64:`BTSL\s[$]14,\s16\([A-Z][A-Z0-9]+\)`,-`ORL`
a[4] |= 0x4000
// amd64:`BTCL\s[$]13,\s20\([A-Z][A-Z0-9]+\)`,-`XORL`
a[5] ^= 0x2000
// amd64:`BTRL\s[A-Z][A-Z0-9]+,\s24\([A-Z][A-Z0-9]+\)`
a[6] &^= 1 << (b & 31)
// amd64:`BTSL\s[A-Z][A-Z0-9]+,\s28\([A-Z][A-Z0-9]+\)`
a[7] |= 1 << (c & 31)
// amd64:`BTCL\s[A-Z][A-Z0-9]+,\s32\([A-Z][A-Z0-9]+\)`
a[8] ^= 1 << (d & 31)
}
func bitcheckMostNegative(b uint8) bool {