mirror of
https://github.com/golang/go.git
synced 2024-09-22 02:48:50 +00:00
runtime: fix (sometimes major) underestimation of heap_live
Currently, we update memstats.heap_live from mcache.local_cachealloc whenever we lock the heap (e.g., to obtain a fresh span or to release an unused span). However, under the right circumstances, local_cachealloc can accumulate allocations up to the size of the *entire heap* without flushing them to heap_live. Specifically, since span allocations from an mcentral don't lock the heap, if a large number of pages are held in an mcentral and the application continues to use and free objects of that size class (e.g., the BinaryTree17 benchmark), local_cachealloc won't be flushed until the mcentral runs out of spans. This is a problem because, unlike many of the memory statistics that are purely informative, heap_live is used to determine when the garbage collector should start and how hard it should work. This commit eliminates local_cachealloc, instead atomically updating heap_live directly. To control contention, we do this only when obtaining a span from an mcentral. Furthermore, we make heap_live conservative: allocating a span assumes that all free slots in that span will be used and accounts for these when the span is allocated, *before* the objects themselves are. This is important because 1) this triggers the GC earlier than necessary rather than potentially too late and 2) this leads to a conservative GC rate rather than a GC rate that is potentially too low. Alternatively, we could have flushed local_cachealloc when it passed some threshold, but this would require determining a threshold and would cause heap_live to underestimate the true value rather than overestimate. Fixes #12199. name old time/op new time/op delta BinaryTree17-12 2.88s ± 4% 2.88s ± 1% ~ (p=0.470 n=19+19) Fannkuch11-12 2.48s ± 1% 2.48s ± 1% ~ (p=0.243 n=16+19) FmtFprintfEmpty-12 50.9ns ± 2% 50.7ns ± 1% ~ (p=0.238 n=15+14) FmtFprintfString-12 175ns ± 1% 171ns ± 1% -2.48% (p=0.000 n=18+18) FmtFprintfInt-12 159ns ± 1% 158ns ± 1% -0.78% (p=0.000 n=19+18) FmtFprintfIntInt-12 270ns ± 1% 265ns ± 2% -1.67% (p=0.000 n=18+18) FmtFprintfPrefixedInt-12 235ns ± 1% 234ns ± 0% ~ (p=0.362 n=18+19) FmtFprintfFloat-12 309ns ± 1% 308ns ± 1% -0.41% (p=0.001 n=18+19) FmtManyArgs-12 1.10µs ± 1% 1.08µs ± 0% -1.96% (p=0.000 n=19+18) GobDecode-12 7.81ms ± 1% 7.80ms ± 1% ~ (p=0.425 n=18+19) GobEncode-12 6.53ms ± 1% 6.53ms ± 1% ~ (p=0.817 n=19+19) Gzip-12 312ms ± 1% 312ms ± 2% ~ (p=0.967 n=19+20) Gunzip-12 42.0ms ± 1% 41.9ms ± 1% ~ (p=0.172 n=19+19) HTTPClientServer-12 63.7µs ± 1% 63.8µs ± 1% ~ (p=0.639 n=19+19) JSONEncode-12 16.4ms ± 1% 16.4ms ± 1% ~ (p=0.954 n=19+19) JSONDecode-12 58.5ms ± 1% 57.8ms ± 1% -1.27% (p=0.000 n=18+19) Mandelbrot200-12 3.86ms ± 1% 3.88ms ± 0% +0.44% (p=0.000 n=18+18) GoParse-12 3.67ms ± 2% 3.66ms ± 1% -0.52% (p=0.001 n=18+19) RegexpMatchEasy0_32-12 100ns ± 1% 100ns ± 0% ~ (p=0.257 n=19+18) RegexpMatchEasy0_1K-12 347ns ± 1% 347ns ± 1% ~ (p=0.527 n=18+18) RegexpMatchEasy1_32-12 83.7ns ± 2% 83.1ns ± 2% ~ (p=0.096 n=18+19) RegexpMatchEasy1_1K-12 509ns ± 1% 505ns ± 1% -0.75% (p=0.000 n=18+19) RegexpMatchMedium_32-12 130ns ± 2% 129ns ± 1% ~ (p=0.962 n=20+20) RegexpMatchMedium_1K-12 39.5µs ± 2% 39.4µs ± 1% ~ (p=0.376 n=20+19) RegexpMatchHard_32-12 2.04µs ± 0% 2.04µs ± 1% ~ (p=0.195 n=18+17) RegexpMatchHard_1K-12 61.4µs ± 1% 61.4µs ± 1% ~ (p=0.885 n=19+19) Revcomp-12 540ms ± 2% 542ms ± 4% ~ (p=0.552 n=19+17) Template-12 69.6ms ± 1% 71.2ms ± 1% +2.39% (p=0.000 n=20+20) TimeParse-12 357ns ± 1% 357ns ± 1% ~ (p=0.883 n=18+20) TimeFormat-12 379ns ± 1% 362ns ± 1% -4.53% (p=0.000 n=18+19) [Geo mean] 62.0µs 61.8µs -0.44% name old time/op new time/op delta XBenchGarbage-12 5.89ms ± 2% 5.81ms ± 2% -1.41% (p=0.000 n=19+18) Change-Id: I96b31cca6ae77c30693a891cff3fe663fa2447a0 Reviewed-on: https://go-review.googlesource.com/17748 Run-TryBot: Austin Clements <austin@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Russ Cox <rsc@golang.org>
This commit is contained in:
parent
4ad64cadf8
commit
87d939dee8
@ -653,7 +653,6 @@ func mallocgc(size uintptr, typ *_type, flags uint32) unsafe.Pointer {
|
||||
}
|
||||
}
|
||||
}
|
||||
c.local_cachealloc += size
|
||||
} else {
|
||||
var s *mspan
|
||||
shouldhelpgc = true
|
||||
|
@ -14,9 +14,8 @@ import "unsafe"
|
||||
type mcache struct {
|
||||
// The following members are accessed on every malloc,
|
||||
// so they are grouped here for better caching.
|
||||
next_sample int32 // trigger heap sample after allocating this many bytes
|
||||
local_cachealloc uintptr // bytes allocated from cache since last lock of heap
|
||||
local_scan uintptr // bytes of scannable heap allocated
|
||||
next_sample int32 // trigger heap sample after allocating this many bytes
|
||||
local_scan uintptr // bytes of scannable heap allocated
|
||||
|
||||
// Allocator cache for tiny objects w/o pointers.
|
||||
// See "Tiny allocator" comment in malloc.go.
|
||||
|
@ -106,6 +106,15 @@ havespan:
|
||||
if usedBytes > 0 {
|
||||
reimburseSweepCredit(usedBytes)
|
||||
}
|
||||
atomic.Xadd64(&memstats.heap_live, int64(spanBytes)-int64(usedBytes))
|
||||
if trace.enabled {
|
||||
// heap_live changed.
|
||||
traceHeapAlloc()
|
||||
}
|
||||
if gcBlackenEnabled != 0 {
|
||||
// heap_live changed.
|
||||
gcController.revise()
|
||||
}
|
||||
if s.freelist.ptr() == nil {
|
||||
throw("freelist empty")
|
||||
}
|
||||
@ -128,6 +137,9 @@ func (c *mcentral) uncacheSpan(s *mspan) {
|
||||
if n > 0 {
|
||||
c.empty.remove(s)
|
||||
c.nonempty.insert(s)
|
||||
// mCentral_CacheSpan conservatively counted
|
||||
// unallocated slots in heap_live. Undo this.
|
||||
atomic.Xadd64(&memstats.heap_live, -int64(n)*int64(s.elemsize))
|
||||
}
|
||||
unlock(&c.lock)
|
||||
}
|
||||
|
@ -1570,6 +1570,11 @@ func gcMark(start_time int64) {
|
||||
// is approximately the amount of heap that was allocated
|
||||
// since marking began).
|
||||
allocatedDuringCycle := memstats.heap_live - work.initialHeapLive
|
||||
if memstats.heap_live < work.initialHeapLive {
|
||||
// This can happen if mCentral_UncacheSpan tightens
|
||||
// the heap_live approximation.
|
||||
allocatedDuringCycle = 0
|
||||
}
|
||||
if work.bytesMarked >= allocatedDuringCycle {
|
||||
memstats.heap_reachable = work.bytesMarked - allocatedDuringCycle
|
||||
} else {
|
||||
@ -1593,7 +1598,9 @@ func gcMark(start_time int64) {
|
||||
throw("next_gc underflow")
|
||||
}
|
||||
|
||||
// Update other GC heap size stats.
|
||||
// Update other GC heap size stats. This must happen after
|
||||
// cachestats (which flushes local statistics to these) and
|
||||
// flushallmcaches (which modifies heap_live).
|
||||
memstats.heap_live = work.bytesMarked
|
||||
memstats.heap_marked = work.bytesMarked
|
||||
memstats.heap_scan = uint64(gcController.scanWork)
|
||||
|
@ -429,8 +429,6 @@ func (h *mheap) alloc_m(npage uintptr, sizeclass int32, large bool) *mspan {
|
||||
}
|
||||
|
||||
// transfer stats from cache to global
|
||||
memstats.heap_live += uint64(_g_.m.mcache.local_cachealloc)
|
||||
_g_.m.mcache.local_cachealloc = 0
|
||||
memstats.heap_scan += uint64(_g_.m.mcache.local_scan)
|
||||
_g_.m.mcache.local_scan = 0
|
||||
memstats.tinyallocs += uint64(_g_.m.mcache.local_tinyallocs)
|
||||
@ -464,7 +462,7 @@ func (h *mheap) alloc_m(npage uintptr, sizeclass int32, large bool) *mspan {
|
||||
h.pagesInUse += uint64(npage)
|
||||
if large {
|
||||
memstats.heap_objects++
|
||||
memstats.heap_live += uint64(npage << _PageShift)
|
||||
atomic.Xadd64(&memstats.heap_live, int64(npage<<_PageShift))
|
||||
// Swept spans are at the end of lists.
|
||||
if s.npages < uintptr(len(h.free)) {
|
||||
h.busy[s.npages].insertBack(s)
|
||||
@ -713,8 +711,6 @@ func (h *mheap) freeSpan(s *mspan, acct int32) {
|
||||
systemstack(func() {
|
||||
mp := getg().m
|
||||
lock(&h.lock)
|
||||
memstats.heap_live += uint64(mp.mcache.local_cachealloc)
|
||||
mp.mcache.local_cachealloc = 0
|
||||
memstats.heap_scan += uint64(mp.mcache.local_scan)
|
||||
mp.mcache.local_scan = 0
|
||||
memstats.tinyallocs += uint64(mp.mcache.local_tinyallocs)
|
||||
@ -723,12 +719,10 @@ func (h *mheap) freeSpan(s *mspan, acct int32) {
|
||||
memstats.heap_objects--
|
||||
}
|
||||
if gcBlackenEnabled != 0 {
|
||||
// heap_scan changed.
|
||||
gcController.revise()
|
||||
}
|
||||
h.freeSpanLocked(s, true, true, 0)
|
||||
if trace.enabled {
|
||||
traceHeapAlloc()
|
||||
}
|
||||
unlock(&h.lock)
|
||||
})
|
||||
}
|
||||
|
@ -46,7 +46,7 @@ type mstats struct {
|
||||
|
||||
// Statistics about garbage collector.
|
||||
// Protected by mheap or stopping the world during GC.
|
||||
next_gc uint64 // next gc (in heap_alloc time)
|
||||
next_gc uint64 // next gc (in heap_live time)
|
||||
last_gc uint64 // last gc (in absolute time)
|
||||
pause_total_ns uint64
|
||||
pause_ns [256]uint64 // circular buffer of recent gc pause lengths
|
||||
@ -70,13 +70,33 @@ type mstats struct {
|
||||
|
||||
// heap_live is the number of bytes considered live by the GC.
|
||||
// That is: retained by the most recent GC plus allocated
|
||||
// since then. heap_live <= heap_alloc, since heap_live
|
||||
// excludes unmarked objects that have not yet been swept.
|
||||
// since then. heap_live <= heap_alloc, since heap_alloc
|
||||
// includes unmarked objects that have not yet been swept (and
|
||||
// hence goes up as we allocate and down as we sweep) while
|
||||
// heap_live excludes these objects (and hence only goes up
|
||||
// between GCs).
|
||||
//
|
||||
// This is updated atomically without locking. To reduce
|
||||
// contention, this is updated only when obtaining a span from
|
||||
// an mcentral and at this point it counts all of the
|
||||
// unallocated slots in that span (which will be allocated
|
||||
// before that mcache obtains another span from that
|
||||
// mcentral). Hence, it slightly overestimates the "true" live
|
||||
// heap size. It's better to overestimate than to
|
||||
// underestimate because 1) this triggers the GC earlier than
|
||||
// necessary rather than potentially too late and 2) this
|
||||
// leads to a conservative GC rate rather than a GC rate that
|
||||
// is potentially too low.
|
||||
//
|
||||
// Whenever this is updated, call traceHeapAlloc() and
|
||||
// gcController.revise().
|
||||
heap_live uint64
|
||||
|
||||
// heap_scan is the number of bytes of "scannable" heap. This
|
||||
// is the live heap (as counted by heap_live), but omitting
|
||||
// no-scan objects and no-scan tails of objects.
|
||||
//
|
||||
// Whenever this is updated, call gcController.revise().
|
||||
heap_scan uint64
|
||||
|
||||
// heap_marked is the number of bytes marked by the previous
|
||||
@ -335,11 +355,6 @@ func flushallmcaches() {
|
||||
func purgecachedstats(c *mcache) {
|
||||
// Protected by either heap or GC lock.
|
||||
h := &mheap_
|
||||
memstats.heap_live += uint64(c.local_cachealloc)
|
||||
c.local_cachealloc = 0
|
||||
if trace.enabled {
|
||||
traceHeapAlloc()
|
||||
}
|
||||
memstats.heap_scan += uint64(c.local_scan)
|
||||
c.local_scan = 0
|
||||
memstats.tinyallocs += uint64(c.local_tinyallocs)
|
||||
|
Loading…
Reference in New Issue
Block a user