From 9fd26872cb11e603fe91a927c22730061cc3ac0b Mon Sep 17 00:00:00 2001 From: Hector Chu Date: Sat, 17 Sep 2011 17:57:59 +1000 Subject: [PATCH] runtime: implement pprof support for windows Credit to jp for proof of concept. R=alex.brainman, jp, rsc, dvyukov CC=golang-dev https://golang.org/cl/4960057 --- src/pkg/runtime/runtime.h | 3 + src/pkg/runtime/windows/386/defs.h | 4 + src/pkg/runtime/windows/386/signal.c | 6 +- src/pkg/runtime/windows/386/sys.s | 43 +++++++--- src/pkg/runtime/windows/amd64/defs.h | 4 + src/pkg/runtime/windows/amd64/signal.c | 6 +- src/pkg/runtime/windows/amd64/sys.s | 40 ++++++--- src/pkg/runtime/windows/defs.c | 6 ++ src/pkg/runtime/windows/os.h | 4 +- src/pkg/runtime/windows/thread.c | 110 ++++++++++++++++++++++++- 10 files changed, 192 insertions(+), 34 deletions(-) diff --git a/src/pkg/runtime/runtime.h b/src/pkg/runtime/runtime.h index 25751b80e1..999511ac28 100644 --- a/src/pkg/runtime/runtime.h +++ b/src/pkg/runtime/runtime.h @@ -212,6 +212,7 @@ struct G uintptr sigcode1; uintptr sigpc; uintptr gopc; // pc of go statement that created this goroutine + uintptr end[]; }; struct M { @@ -253,9 +254,11 @@ struct M uint32 fflag; // floating point compare flags #ifdef __WINDOWS__ + void* thread; // thread handle void* event; // event for signalling M* nextwaitm; // next M waiting for lock #endif + uintptr end[]; }; struct Stktop diff --git a/src/pkg/runtime/windows/386/defs.h b/src/pkg/runtime/windows/386/defs.h index 49fc19504a..6cc5336a94 100644 --- a/src/pkg/runtime/windows/386/defs.h +++ b/src/pkg/runtime/windows/386/defs.h @@ -10,9 +10,13 @@ enum { PROT_EXEC = 0x4, MAP_ANON = 0x1, MAP_PRIVATE = 0x2, + DUPLICATE_SAME_ACCESS = 0x2, + THREAD_PRIORITY_HIGHEST = 0x2, SIGINT = 0x2, CTRL_C_EVENT = 0, CTRL_BREAK_EVENT = 0x1, + CONTEXT_CONTROL = 0x10001, + CONTEXT_FULL = 0x10007, EXCEPTION_ACCESS_VIOLATION = 0xc0000005, EXCEPTION_BREAKPOINT = 0x80000003, EXCEPTION_FLT_DENORMAL_OPERAND = 0xc000008d, diff --git a/src/pkg/runtime/windows/386/signal.c b/src/pkg/runtime/windows/386/signal.c index cc6a2302ff..9c912ede49 100644 --- a/src/pkg/runtime/windows/386/signal.c +++ b/src/pkg/runtime/windows/386/signal.c @@ -90,9 +90,7 @@ runtime·sighandler(ExceptionRecord *info, void *frame, Context *r) } void -runtime·resetcpuprofiler(int32 hz) +runtime·dosigprof(Context *r, G *gp) { - // TODO: Enable profiling interrupts. - - m->profilehz = hz; + runtime·sigprof((uint8*)r->Eip, (uint8*)r->Esp, nil, gp); } diff --git a/src/pkg/runtime/windows/386/sys.s b/src/pkg/runtime/windows/386/sys.s index 2d41d858d9..95ae5336bf 100644 --- a/src/pkg/runtime/windows/386/sys.s +++ b/src/pkg/runtime/windows/386/sys.s @@ -96,31 +96,52 @@ TEXT runtime·sigtramp1(SB),0,$16-40 sigdone: RET -// Windows runs the ctrl handler in a new thread. TEXT runtime·ctrlhandler(SB),7,$0 + PUSHL $runtime·ctrlhandler1(SB) + CALL runtime·externalthreadhandler(SB) + MOVL 4(SP), CX + ADDL $12, SP + JMP CX + +TEXT runtime·profileloop(SB),7,$0 + PUSHL $runtime·profileloop1(SB) + CALL runtime·externalthreadhandler(SB) + MOVL 4(SP), CX + ADDL $12, SP + JMP CX + +TEXT runtime·externalthreadhandler(SB),7,$0 PUSHL BP MOVL SP, BP PUSHL BX PUSHL SI PUSHL DI PUSHL 0x2c(FS) - MOVL SP, BX + MOVL SP, DX // setup dummy m, g - SUBL $(m_fflag+4), SP // at least space for m_fflag + SUBL $m_end, SP // space for M + MOVL SP, 0(SP) + MOVL $m_end, 4(SP) + CALL runtime·memclr(SB) // smashes AX,BX,CX + LEAL m_tls(SP), CX MOVL CX, 0x2c(FS) MOVL SP, m(CX) - MOVL SP, DX - SUBL $8, SP // space for g_stack{guard,base} + MOVL SP, BX + SUBL $g_end, SP // space for G MOVL SP, g(CX) - MOVL SP, m_g0(DX) + MOVL SP, m_g0(BX) + + MOVL SP, 0(SP) + MOVL $g_end, 4(SP) + CALL runtime·memclr(SB) // smashes AX,BX,CX LEAL -4096(SP), CX MOVL CX, g_stackguard(SP) - MOVL BX, g_stackbase(SP) + MOVL DX, g_stackbase(SP) - PUSHL 8(BP) - CALL runtime·ctrlhandler1(SB) + PUSHL 16(BP) // arg for handler + CALL 8(BP) POPL CX get_tls(CX) @@ -131,9 +152,7 @@ TEXT runtime·ctrlhandler(SB),7,$0 POPL SI POPL BX POPL BP - MOVL 0(SP), CX - ADDL $8, SP - JMP CX + RET // Called from dynamic function created by ../thread.c compilecallback, // running on Windows stack (not Go stack). diff --git a/src/pkg/runtime/windows/amd64/defs.h b/src/pkg/runtime/windows/amd64/defs.h index 30c66df51c..d5191a3d74 100644 --- a/src/pkg/runtime/windows/amd64/defs.h +++ b/src/pkg/runtime/windows/amd64/defs.h @@ -10,9 +10,13 @@ enum { PROT_EXEC = 0x4, MAP_ANON = 0x1, MAP_PRIVATE = 0x2, + DUPLICATE_SAME_ACCESS = 0x2, + THREAD_PRIORITY_HIGHEST = 0x2, SIGINT = 0x2, CTRL_C_EVENT = 0, CTRL_BREAK_EVENT = 0x1, + CONTEXT_CONTROL = 0x100001, + CONTEXT_FULL = 0x10000b, EXCEPTION_ACCESS_VIOLATION = 0xc0000005, EXCEPTION_BREAKPOINT = 0x80000003, EXCEPTION_FLT_DENORMAL_OPERAND = 0xc000008d, diff --git a/src/pkg/runtime/windows/amd64/signal.c b/src/pkg/runtime/windows/amd64/signal.c index 1e621b7607..97106c8b84 100644 --- a/src/pkg/runtime/windows/amd64/signal.c +++ b/src/pkg/runtime/windows/amd64/signal.c @@ -100,9 +100,7 @@ runtime·sighandler(ExceptionRecord *info, Context *r, G *gp) } void -runtime·resetcpuprofiler(int32 hz) +runtime·dosigprof(Context *r, G *gp) { - // TODO: Enable profiling interrupts. - - m->profilehz = hz; + runtime·sigprof((uint8*)r->Rip, (uint8*)r->Rsp, nil, gp); } diff --git a/src/pkg/runtime/windows/amd64/sys.s b/src/pkg/runtime/windows/amd64/sys.s index 3e50780dc9..113db2004b 100644 --- a/src/pkg/runtime/windows/amd64/sys.s +++ b/src/pkg/runtime/windows/amd64/sys.s @@ -100,31 +100,51 @@ TEXT runtime·sigtramp(SB),7,$56 sigdone: RET -// Windows runs the ctrl handler in a new thread. -TEXT runtime·ctrlhandler(SB),7,$0 +TEXT runtime·ctrlhandler(SB),7,$8 + MOVQ CX, 16(SP) // spill + MOVQ $runtime·ctrlhandler1(SB), CX + MOVQ CX, 0(SP) + CALL runtime·externalthreadhandler(SB) + RET + +TEXT runtime·profileloop(SB),7,$8 + MOVQ $runtime·profileloop1(SB), CX + MOVQ CX, 0(SP) + CALL runtime·externalthreadhandler(SB) + RET + +TEXT runtime·externalthreadhandler(SB),7,$0 PUSHQ BP MOVQ SP, BP PUSHQ BX PUSHQ SI PUSHQ DI PUSHQ 0x58(GS) - MOVQ SP, BX + MOVQ SP, DX // setup dummy m, g - SUBQ $(m_fflag+4), SP // at least space for m_fflag + SUBQ $m_end, SP // space for M + MOVQ SP, 0(SP) + MOVQ $m_end, 8(SP) + CALL runtime·memclr(SB) // smashes AX,BX,CX + LEAQ m_tls(SP), CX MOVQ CX, 0x58(GS) MOVQ SP, m(CX) - MOVQ SP, DX - SUBQ $16, SP // space for g_stack{guard,base} + MOVQ SP, BX + SUBQ $g_end, SP // space for G MOVQ SP, g(CX) - MOVQ SP, m_g0(DX) + MOVQ SP, m_g0(BX) + + MOVQ SP, 0(SP) + MOVQ $g_end, 8(SP) + CALL runtime·memclr(SB) // smashes AX,BX,CX LEAQ -8192(SP), CX MOVQ CX, g_stackguard(SP) - MOVQ BX, g_stackbase(SP) + MOVQ DX, g_stackbase(SP) - PUSHQ 16(BP) - CALL runtime·ctrlhandler1(SB) + PUSHQ 32(BP) // arg for handler + CALL 16(BP) POPQ CX get_tls(CX) diff --git a/src/pkg/runtime/windows/defs.c b/src/pkg/runtime/windows/defs.c index b076afd5dd..1b07dfbc19 100644 --- a/src/pkg/runtime/windows/defs.c +++ b/src/pkg/runtime/windows/defs.c @@ -17,10 +17,16 @@ enum { $MAP_ANON = 1, $MAP_PRIVATE = 2, + $DUPLICATE_SAME_ACCESS = DUPLICATE_SAME_ACCESS, + $THREAD_PRIORITY_HIGHEST = THREAD_PRIORITY_HIGHEST, + $SIGINT = SIGINT, $CTRL_C_EVENT = CTRL_C_EVENT, $CTRL_BREAK_EVENT = CTRL_BREAK_EVENT, + $CONTEXT_CONTROL = CONTEXT_CONTROL, + $CONTEXT_FULL = CONTEXT_FULL, + $EXCEPTION_ACCESS_VIOLATION = STATUS_ACCESS_VIOLATION, $EXCEPTION_BREAKPOINT = STATUS_BREAKPOINT, $EXCEPTION_FLT_DENORMAL_OPERAND = STATUS_FLOAT_DENORMAL_OPERAND, diff --git a/src/pkg/runtime/windows/os.h b/src/pkg/runtime/windows/os.h index 0ac5cbfd71..21277c64bc 100644 --- a/src/pkg/runtime/windows/os.h +++ b/src/pkg/runtime/windows/os.h @@ -13,8 +13,8 @@ extern void *runtime·GetProcAddress; void runtime·asmstdcall(void *c); void *runtime·stdcall(void *fn, int32 count, ...); -uintptr runtime·getlasterror(void); -void runtime·setlasterror(uintptr err); +uint32 runtime·getlasterror(void); +void runtime·setlasterror(uint32 err); // Function to be called by windows CreateThread // to start new os thread. diff --git a/src/pkg/runtime/windows/thread.c b/src/pkg/runtime/windows/thread.c index 33637f1d7a..97a42d73a0 100644 --- a/src/pkg/runtime/windows/thread.c +++ b/src/pkg/runtime/windows/thread.c @@ -10,32 +10,48 @@ #pragma dynimport runtime·CloseHandle CloseHandle "kernel32.dll" #pragma dynimport runtime·CreateEvent CreateEventA "kernel32.dll" #pragma dynimport runtime·CreateThread CreateThread "kernel32.dll" +#pragma dynimport runtime·CreateWaitableTimer CreateWaitableTimerA "kernel32.dll" +#pragma dynimport runtime·DuplicateHandle DuplicateHandle "kernel32.dll" #pragma dynimport runtime·ExitProcess ExitProcess "kernel32.dll" #pragma dynimport runtime·FreeEnvironmentStringsW FreeEnvironmentStringsW "kernel32.dll" #pragma dynimport runtime·GetEnvironmentStringsW GetEnvironmentStringsW "kernel32.dll" #pragma dynimport runtime·GetProcAddress GetProcAddress "kernel32.dll" #pragma dynimport runtime·GetStdHandle GetStdHandle "kernel32.dll" +#pragma dynimport runtime·GetThreadContext GetThreadContext "kernel32.dll" #pragma dynimport runtime·LoadLibraryEx LoadLibraryExA "kernel32.dll" #pragma dynimport runtime·QueryPerformanceCounter QueryPerformanceCounter "kernel32.dll" #pragma dynimport runtime·QueryPerformanceFrequency QueryPerformanceFrequency "kernel32.dll" +#pragma dynimport runtime·ResumeThread ResumeThread "kernel32.dll" #pragma dynimport runtime·SetConsoleCtrlHandler SetConsoleCtrlHandler "kernel32.dll" #pragma dynimport runtime·SetEvent SetEvent "kernel32.dll" +#pragma dynimport runtime·SetThreadPriority SetThreadPriority "kernel32.dll" +#pragma dynimport runtime·SetWaitableTimer SetWaitableTimer "kernel32.dll" +#pragma dynimport runtime·SuspendThread SuspendThread "kernel32.dll" +#pragma dynimport runtime·timeBeginPeriod timeBeginPeriod "winmm.dll" #pragma dynimport runtime·WaitForSingleObject WaitForSingleObject "kernel32.dll" #pragma dynimport runtime·WriteFile WriteFile "kernel32.dll" extern void *runtime·CloseHandle; extern void *runtime·CreateEvent; extern void *runtime·CreateThread; +extern void *runtime·CreateWaitableTimer; +extern void *runtime·DuplicateHandle; extern void *runtime·ExitProcess; extern void *runtime·FreeEnvironmentStringsW; extern void *runtime·GetEnvironmentStringsW; extern void *runtime·GetProcAddress; extern void *runtime·GetStdHandle; +extern void *runtime·GetThreadContext; extern void *runtime·LoadLibraryEx; extern void *runtime·QueryPerformanceCounter; extern void *runtime·QueryPerformanceFrequency; +extern void *runtime·ResumeThread; extern void *runtime·SetConsoleCtrlHandler; extern void *runtime·SetEvent; +extern void *runtime·SetThreadPriority; +extern void *runtime·SetWaitableTimer; +extern void *runtime·SuspendThread; +extern void *runtime·timeBeginPeriod; extern void *runtime·WaitForSingleObject; extern void *runtime·WriteFile; @@ -44,8 +60,13 @@ static int64 timerfreq; void runtime·osinit(void) { + // -1 = current process, -2 = current thread + runtime·stdcall(runtime·DuplicateHandle, 7, + (uintptr)-1, (uintptr)-2, (uintptr)-1, &m->thread, + (uintptr)0, (uintptr)0, (uintptr)DUPLICATE_SAME_ACCESS); runtime·stdcall(runtime·QueryPerformanceFrequency, 1, &timerfreq); runtime·stdcall(runtime·SetConsoleCtrlHandler, 2, runtime·ctrlhandler, (uintptr)1); + runtime·stdcall(runtime·timeBeginPeriod, 1, (uintptr)1); } void @@ -211,11 +232,13 @@ runtime·newosproc(M *m, G *g, void *stk, void (*fn)(void)) USED(g); // assuming g = m->g0 USED(fn); // assuming fn = mstart - thandle = runtime·stdcall(runtime·CreateThread, 6, (uintptr)0, (uintptr)0, runtime·tstart_stdcall, m, (uintptr)0, (uintptr)0); - if(thandle == 0) { + thandle = runtime·stdcall(runtime·CreateThread, 6, + nil, nil, runtime·tstart_stdcall, m, nil, nil); + if(thandle == nil) { runtime·printf("runtime: failed to create new OS thread (have %d already; errno=%d)\n", runtime·mcount(), runtime·getlasterror()); runtime·throw("runtime.newosproc"); } + runtime·atomicstorep(&m->thread, thandle); } // Called to initialize a new m (including the bootstrap m). @@ -324,6 +347,89 @@ runtime·ctrlhandler1(uint32 type) return 0; } +extern void runtime·dosigprof(Context *r, G *gp); +extern void runtime·profileloop(void); +static void *profiletimer; + +static void +profilem(M *mp) +{ + extern M runtime·m0; + extern uint32 runtime·tls0[]; + byte rbuf[sizeof(Context)+15]; + Context *r; + void *tls; + G *gp; + + tls = mp->tls; + if(mp == &runtime·m0) + tls = runtime·tls0; + gp = *(G**)tls; + + if(gp != nil && gp != mp->g0 && gp->status != Gsyscall) { + // align Context to 16 bytes + r = (Context*)((uintptr)(&rbuf[15]) & ~15); + r->ContextFlags = CONTEXT_CONTROL; + runtime·stdcall(runtime·GetThreadContext, 2, mp->thread, r); + runtime·dosigprof(r, gp); + } +} + +void +runtime·profileloop1(void) +{ + M *mp, *allm; + void *thread; + + runtime·stdcall(runtime·SetThreadPriority, 2, + (uintptr)-2, (uintptr)THREAD_PRIORITY_HIGHEST); + + for(;;) { + runtime·stdcall(runtime·WaitForSingleObject, 2, profiletimer, (uintptr)-1); + allm = runtime·atomicloadp(&runtime·allm); + for(mp = allm; mp != nil; mp = mp->alllink) { + thread = runtime·atomicloadp(&mp->thread); + if(thread == nil) + continue; + runtime·stdcall(runtime·SuspendThread, 1, thread); + if(mp->profilehz != 0) + profilem(mp); + runtime·stdcall(runtime·ResumeThread, 1, thread); + } + } +} + +void +runtime·resetcpuprofiler(int32 hz) +{ + static Lock lock; + void *timer, *thread; + int32 ms; + int64 due; + + runtime·lock(&lock); + if(profiletimer == nil) { + timer = runtime·stdcall(runtime·CreateWaitableTimer, 3, nil, nil, nil); + runtime·atomicstorep(&profiletimer, timer); + thread = runtime·stdcall(runtime·CreateThread, 6, + nil, nil, runtime·profileloop, nil, nil, nil); + runtime·stdcall(runtime·CloseHandle, 1, thread); + } + runtime·unlock(&lock); + + ms = 0; + due = 1LL<<63; + if(hz > 0) { + ms = 1000 / hz; + if(ms == 0) + ms = 1; + due = ms * -10000; + } + runtime·stdcall(runtime·SetWaitableTimer, 6, + profiletimer, &due, (uintptr)ms, nil, nil, nil); + runtime·atomicstore((uint32*)&m->profilehz, hz); +} + void os·sigpipe(void) {