runtime: support register ABI Go functions from Windows callbacks

This change modifies the system that allows Go functions to be set as
callbacks in various Windows systems to support the new register ABI.

For #40724.

Change-Id: Ie067f9e8a76c96d56177d7aa88f89cbe7223e12e
Reviewed-on: https://go-review.googlesource.com/c/go/+/300113
Reviewed-by: Austin Clements <austin@google.com>
Reviewed-by: Cherry Zhang <cherryyz@google.com>
Trust: Michael Knyszek <mknyszek@google.com>
Run-TryBot: Michael Knyszek <mknyszek@google.com>
TryBot-Result: Go Bot <gobot@golang.org>
This commit is contained in:
Michael Anthony Knyszek 2021-03-09 21:13:34 +00:00 committed by Michael Knyszek
parent cb42e3e979
commit 44dd06670f
3 changed files with 306 additions and 82 deletions

View File

@ -1233,7 +1233,9 @@ func (th *TimeHistogram) Record(duration int64) {
func SetIntArgRegs(a int) int {
lock(&finlock)
old := intArgRegs
intArgRegs = a
if a >= 0 {
intArgRegs = a
}
unlock(&finlock)
return old
}

View File

@ -22,30 +22,178 @@ var cbs struct {
type winCallback struct {
fn *funcval // Go function
retPop uintptr // For 386 cdecl, how many bytes to pop on return
abiMap abiDesc
}
// abiPartKind is the action an abiPart should take.
type abiPartKind int
const (
abiPartBad abiPartKind = iota
abiPartStack // Move a value from memory to the stack.
abiPartReg // Move a value from memory to a register.
)
// abiPart encodes a step in translating between calling ABIs.
type abiPart struct {
kind abiPartKind
srcStackOffset uintptr
dstStackOffset uintptr // used if kind == abiPartStack
dstRegister int // used if kind == abiPartReg
len uintptr
}
func (a *abiPart) tryMerge(b abiPart) bool {
if a.kind != abiPartStack || b.kind != abiPartStack {
return false
}
if a.srcStackOffset+a.len == b.srcStackOffset && a.dstStackOffset+a.len == b.dstStackOffset {
a.len += b.len
return true
}
return false
}
// abiDesc specifies how to translate from a C frame to a Go
// frame. This does not specify how to translate back because
// the result is always a uintptr. If the C ABI is fastcall,
// this assumes the four fastcall registers were first spilled
// to the shadow space.
type abiDesc struct {
parts []abiPart
srcStackSize uintptr // stdcall/fastcall stack space tracking
dstStackSize uintptr // Go stack space used
dstRegisters int // Go ABI int argument registers used
// abiMap specifies how to translate from a C frame to a Go
// frame. This does not specify how to translate back because
// the result is always a uintptr. If the C ABI is fastcall,
// this assumes the four fastcall registers were first spilled
// to the shadow space.
abiMap []abiPart
// retOffset is the offset of the uintptr-sized result in the Go
// frame.
retOffset uintptr
}
// abiPart encodes a step in translating between calling ABIs.
type abiPart struct {
src, dst uintptr
len uintptr
func (p *abiDesc) assignArg(t *_type) {
if t.size > sys.PtrSize {
// We don't support this right now. In
// stdcall/cdecl, 64-bit ints and doubles are
// passed as two words (little endian); and
// structs are pushed on the stack. In
// fastcall, arguments larger than the word
// size are passed by reference. On arm,
// 8-byte aligned arguments round up to the
// next even register and can be split across
// registers and the stack.
panic("compileCallback: argument size is larger than uintptr")
}
if k := t.kind & kindMask; GOARCH != "386" && (k == kindFloat32 || k == kindFloat64) {
// In fastcall, floating-point arguments in
// the first four positions are passed in
// floating-point registers, which we don't
// currently spill. arm passes floating-point
// arguments in VFP registers, which we also
// don't support.
// So basically we only support 386.
panic("compileCallback: float arguments not supported")
}
if t.size == 0 {
// The Go ABI aligns for zero-sized types.
p.dstStackSize = alignUp(p.dstStackSize, uintptr(t.align))
return
}
// In the C ABI, we're already on a word boundary.
// Also, sub-word-sized fastcall register arguments
// are stored to the least-significant bytes of the
// argument word and all supported Windows
// architectures are little endian, so srcStackOffset
// is already pointing to the right place for smaller
// arguments. The same is true on arm.
oldParts := p.parts
if !p.tryRegAssignArg(t, 0) {
// Register assignment failed.
// Undo the work and stack assign.
p.parts = oldParts
// The Go ABI aligns arguments.
p.dstStackSize = alignUp(p.dstStackSize, uintptr(t.align))
// Copy just the size of the argument. Note that this
// could be a small by-value struct, but C and Go
// struct layouts are compatible, so we can copy these
// directly, too.
part := abiPart{
kind: abiPartStack,
srcStackOffset: p.srcStackSize,
dstStackOffset: p.dstStackSize,
len: t.size,
}
// Add this step to the adapter.
if len(p.parts) == 0 || !p.parts[len(p.parts)-1].tryMerge(part) {
p.parts = append(p.parts, part)
}
// The Go ABI packs arguments.
p.dstStackSize += t.size
}
// cdecl, stdcall, fastcall, and arm pad arguments to word size.
// TODO(rsc): On arm and arm64 do we need to skip the caller's saved LR?
p.srcStackSize += sys.PtrSize
}
func (a *abiPart) tryMerge(b abiPart) bool {
if a.src+a.len == b.src && a.dst+a.len == b.dst {
a.len += b.len
// tryRegAssignArg tries to register-assign a value of type t.
// If this type is nested in an aggregate type, then offset is the
// offset of this type within its parent type.
// Assumes t.size <= sys.PtrSize and t.size != 0.
//
// Returns whether the assignment succeeded.
func (p *abiDesc) tryRegAssignArg(t *_type, offset uintptr) bool {
switch k := t.kind & kindMask; k {
case kindBool, kindInt, kindInt8, kindInt16, kindInt32, kindUint, kindUint8, kindUint16, kindUint32, kindUintptr, kindPtr, kindUnsafePointer:
// Assign a register for all these types.
return p.assignReg(t.size, offset)
case kindInt64, kindUint64:
// Only register-assign if the registers are big enough.
if sys.PtrSize == 8 {
return p.assignReg(t.size, offset)
}
case kindArray:
at := (*arraytype)(unsafe.Pointer(t))
if at.len == 1 {
return p.tryRegAssignArg(at.elem, offset)
}
case kindStruct:
st := (*structtype)(unsafe.Pointer(t))
for i := range st.fields {
f := &st.fields[i]
if !p.tryRegAssignArg(f.typ, offset+f.offset()) {
return false
}
}
return true
}
return false
// Pointer-sized types such as maps and channels are currently
// not supported.
panic("compileCallabck: type " + t.string() + " is currently not supported for use in system callbacks")
}
// assignReg attempts to assign a single register for an
// argument with the given size, at the given offset into the
// value in the C ABI space.
//
// Returns whether the assignment was successful.
func (p *abiDesc) assignReg(size, offset uintptr) bool {
if p.dstRegisters >= intArgRegs {
return false
}
p.parts = append(p.parts, abiPart{
kind: abiPartReg,
srcStackOffset: p.srcStackSize + offset,
dstRegister: p.dstRegisters,
len: size,
})
p.dstRegisters++
return true
}
type winCallbackKey struct {
@ -101,62 +249,14 @@ func compileCallback(fn eface, cdecl bool) (code uintptr) {
ft := (*functype)(unsafe.Pointer(fn._type))
// Check arguments and construct ABI translation.
var abiMap []abiPart
var src, dst uintptr
var abiMap abiDesc
for _, t := range ft.in() {
if t.size > sys.PtrSize {
// We don't support this right now. In
// stdcall/cdecl, 64-bit ints and doubles are
// passed as two words (little endian); and
// structs are pushed on the stack. In
// fastcall, arguments larger than the word
// size are passed by reference. On arm,
// 8-byte aligned arguments round up to the
// next even register and can be split across
// registers and the stack.
panic("compileCallback: argument size is larger than uintptr")
}
if k := t.kind & kindMask; GOARCH != "386" && (k == kindFloat32 || k == kindFloat64) {
// In fastcall, floating-point arguments in
// the first four positions are passed in
// floating-point registers, which we don't
// currently spill. arm passes floating-point
// arguments in VFP registers, which we also
// don't support.
// So basically we only support 386.
panic("compileCallback: float arguments not supported")
}
// The Go ABI aligns arguments.
dst = alignUp(dst, uintptr(t.align))
// In the C ABI, we're already on a word boundary.
// Also, sub-word-sized fastcall register arguments
// are stored to the least-significant bytes of the
// argument word and all supported Windows
// architectures are little endian, so src is already
// pointing to the right place for smaller arguments.
// The same is true on arm.
// Copy just the size of the argument. Note that this
// could be a small by-value struct, but C and Go
// struct layouts are compatible, so we can copy these
// directly, too.
part := abiPart{src, dst, t.size}
// Add this step to the adapter.
if len(abiMap) == 0 || !abiMap[len(abiMap)-1].tryMerge(part) {
abiMap = append(abiMap, part)
}
// cdecl, stdcall, fastcall, and arm pad arguments to word size.
// TODO(rsc): On arm and arm64 do we need to skip the caller's saved LR?
src += sys.PtrSize
// The Go ABI packs arguments.
dst += t.size
abiMap.assignArg(t)
}
// The Go ABI aligns the result to the word size. src is
// already aligned.
dst = alignUp(dst, sys.PtrSize)
retOffset := dst
abiMap.dstStackSize = alignUp(abiMap.dstStackSize, sys.PtrSize)
abiMap.retOffset = abiMap.dstStackSize
if len(ft.out()) != 1 {
panic("compileCallback: expected function with one uintptr-sized result")
@ -170,10 +270,14 @@ func compileCallback(fn eface, cdecl bool) (code uintptr) {
// Either way, it's not AX.
panic("compileCallback: float results not supported")
}
// Make room for the uintptr-sized result.
dst += sys.PtrSize
if intArgRegs == 0 {
// Make room for the uintptr-sized result.
// If there are argument registers, the return value will
// be passed in the first register.
abiMap.dstStackSize += sys.PtrSize
}
if dst > callbackMaxFrame {
if abiMap.dstStackSize > callbackMaxFrame {
panic("compileCallback: function argument frame too large")
}
@ -181,7 +285,7 @@ func compileCallback(fn eface, cdecl bool) (code uintptr) {
// arguments from the C stack.
var retPop uintptr
if cdecl {
retPop = src
retPop = abiMap.srcStackSize
}
key := winCallbackKey{(*funcval)(fn.data), cdecl}
@ -203,7 +307,7 @@ func compileCallback(fn eface, cdecl bool) (code uintptr) {
unlock(&cbs.lock)
throw("too many callback functions")
}
c := winCallback{key.fn, retPop, abiMap, retOffset}
c := winCallback{key.fn, retPop, abiMap}
cbs.ctxt[n] = c
cbs.index[key] = n
cbs.n++
@ -237,22 +341,39 @@ func callbackWrap(a *callbackArgs) {
a.retPop = c.retPop
// Convert from C to Go ABI.
var regs abi.RegArgs
var frame [callbackMaxFrame]byte
goArgs := unsafe.Pointer(&frame)
for _, part := range c.abiMap {
memmove(add(goArgs, part.dst), add(a.args, part.src), part.len)
for _, part := range c.abiMap.parts {
switch part.kind {
case abiPartStack:
memmove(add(goArgs, part.dstStackOffset), add(a.args, part.srcStackOffset), part.len)
case abiPartReg:
goReg := unsafe.Pointer(&regs.Ints[part.dstRegister])
memmove(goReg, add(a.args, part.srcStackOffset), part.len)
default:
panic("bad ABI description")
}
}
// Even though this is copying back results, we can pass a nil
// type because those results must not require write barriers.
//
// Pass a dummy RegArgs for now.
// TODO(mknyszek): Pass arguments in registers.
var regs abi.RegArgs
reflectcall(nil, unsafe.Pointer(c.fn), noescape(goArgs), uint32(c.retOffset)+sys.PtrSize, uint32(c.retOffset), uint32(c.retOffset)+sys.PtrSize, &regs)
reflectcall(nil, unsafe.Pointer(c.fn), noescape(goArgs), uint32(c.abiMap.dstStackSize), uint32(c.abiMap.retOffset), uint32(c.abiMap.dstStackSize), &regs)
// Extract the result.
a.result = *(*uintptr)(unsafe.Pointer(&frame[c.retOffset]))
//
// There's always exactly one return value, one pointer in size.
// If it's on the stack, then we will have reserved space for it
// at the end of the frame, otherwise it was passed in a register.
if c.abiMap.dstStackSize != c.abiMap.retOffset {
a.result = *(*uintptr)(unsafe.Pointer(&frame[c.abiMap.retOffset]))
} else {
var zero int
// On architectures with no registers, Ints[0] would be a compile error,
// so we use a dynamic index. These architectures will never take this
// branch, so this won't cause a runtime panic.
a.result = regs.Ints[zero]
}
}
const _LOAD_LIBRARY_SEARCH_SYSTEM32 = 0x00000800

View File

@ -7,6 +7,7 @@ package runtime_test
import (
"bytes"
"fmt"
"internal/abi"
"internal/syscall/windows/sysdll"
"internal/testenv"
"io"
@ -390,6 +391,103 @@ var cbFuncs = []cbFunc{
}},
}
//go:registerparams
func sum2(i1, i2 uintptr) uintptr {
return i1 + i2
}
//go:registerparams
func sum3(i1, i2, i3 uintptr) uintptr {
return i1 + i2 + i3
}
//go:registerparams
func sum4(i1, i2, i3, i4 uintptr) uintptr {
return i1 + i2 + i3 + i4
}
//go:registerparams
func sum5(i1, i2, i3, i4, i5 uintptr) uintptr {
return i1 + i2 + i3 + i4 + i5
}
//go:registerparams
func sum6(i1, i2, i3, i4, i5, i6 uintptr) uintptr {
return i1 + i2 + i3 + i4 + i5 + i6
}
//go:registerparams
func sum7(i1, i2, i3, i4, i5, i6, i7 uintptr) uintptr {
return i1 + i2 + i3 + i4 + i5 + i6 + i7
}
//go:registerparams
func sum8(i1, i2, i3, i4, i5, i6, i7, i8 uintptr) uintptr {
return i1 + i2 + i3 + i4 + i5 + i6 + i7 + i8
}
//go:registerparams
func sum9(i1, i2, i3, i4, i5, i6, i7, i8, i9 uintptr) uintptr {
return i1 + i2 + i3 + i4 + i5 + i6 + i7 + i8 + i9
}
//go:registerparams
func sum10(i1, i2, i3, i4, i5, i6, i7, i8, i9, i10 uintptr) uintptr {
return i1 + i2 + i3 + i4 + i5 + i6 + i7 + i8 + i9 + i10
}
//go:registerparams
func sum9uint8(i1, i2, i3, i4, i5, i6, i7, i8, i9 uint8) uintptr {
return uintptr(i1 + i2 + i3 + i4 + i5 + i6 + i7 + i8 + i9)
}
//go:registerparams
func sum9uint16(i1, i2, i3, i4, i5, i6, i7, i8, i9 uint16) uintptr {
return uintptr(i1 + i2 + i3 + i4 + i5 + i6 + i7 + i8 + i9)
}
//go:registerparams
func sum9int8(i1, i2, i3, i4, i5, i6, i7, i8, i9 int8) uintptr {
return uintptr(i1 + i2 + i3 + i4 + i5 + i6 + i7 + i8 + i9)
}
//go:registerparams
func sum5mix(i1 int8, i2 int16, i3 int32, i4, i5 uintptr) uintptr {
return uintptr(i1) + uintptr(i2) + uintptr(i3) + i4 + i5
}
//go:registerparams
func sum5andPair(i1, i2, i3, i4, i5 uint8Pair) uintptr {
return uintptr(i1.x + i1.y + i2.x + i2.y + i3.x + i3.y + i4.x + i4.y + i5.x + i5.y)
}
// TODO(register args): Remove this once we switch to using the register
// calling convention by default, since this is redundant with the existing
// tests.
var cbFuncsRegABI = []cbFunc{
{sum2},
{sum3},
{sum4},
{sum5},
{sum6},
{sum7},
{sum8},
{sum9},
{sum10},
{sum9uint8},
{sum9uint16},
{sum9int8},
{sum5mix},
{sum5andPair},
}
func getCallbackTestFuncs() []cbFunc {
if regs := runtime.SetIntArgRegs(-1); regs > 0 {
return cbFuncsRegABI
}
return cbFuncs
}
type cbDLL struct {
name string
buildArgs func(out, src string) []string
@ -406,7 +504,7 @@ func (d *cbDLL) makeSrc(t *testing.T, path string) {
#include <stdint.h>
typedef struct { uint8_t x, y; } uint8Pair_t;
`)
for _, cbf := range cbFuncs {
for _, cbf := range getCallbackTestFuncs() {
cbf.cSrc(f, false)
cbf.cSrc(f, true)
}
@ -451,12 +549,15 @@ func TestStdcallAndCDeclCallbacks(t *testing.T) {
}
defer os.RemoveAll(tmp)
oldRegs := runtime.SetIntArgRegs(abi.IntArgRegs)
defer runtime.SetIntArgRegs(oldRegs)
for _, dll := range cbDLLs {
t.Run(dll.name, func(t *testing.T) {
dllPath := dll.build(t, tmp)
dll := syscall.MustLoadDLL(dllPath)
defer dll.Release()
for _, cbf := range cbFuncs {
for _, cbf := range getCallbackTestFuncs() {
t.Run(cbf.cName(false), func(t *testing.T) {
stdcall := syscall.NewCallback(cbf.goFunc)
cbf.testOne(t, dll, false, stdcall)