sync: improve RWMutex performance

The new implementation features wait-free
fast path for readers which significantly improves
performance/scalability on read-mostly workloads.
Benchmark results on HP Z600 (2 x Xeon E5620, 8 HT cores, 2.40GHz)
are as follows:
benchmark                              old ns/op    new ns/op    delta
BenchmarkRWMutexUncontended               179.00        96.60  -46.03%
BenchmarkRWMutexUncontended-2              89.10        49.10  -44.89%
BenchmarkRWMutexUncontended-4              44.70        24.70  -44.74%
BenchmarkRWMutexUncontended-8              23.30        12.90  -44.64%
BenchmarkRWMutexUncontended-16             16.80         8.75  -47.92%
BenchmarkRWMutexWrite100                   79.60        26.80  -66.33%
BenchmarkRWMutexWrite100-2                305.00        33.00  -89.18%
BenchmarkRWMutexWrite100-4                245.00       113.00  -53.88%
BenchmarkRWMutexWrite100-8                330.00       147.00  -55.45%
BenchmarkRWMutexWrite100-16               371.00       152.00  -59.03%
BenchmarkRWMutexWrite10                    78.30        29.80  -61.94%
BenchmarkRWMutexWrite10-2                 348.00       165.00  -52.59%
BenchmarkRWMutexWrite10-4                 447.00       199.00  -55.48%
BenchmarkRWMutexWrite10-8                 564.00       180.00  -68.09%
BenchmarkRWMutexWrite10-16                492.00       192.00  -60.98%
BenchmarkRWMutexWorkWrite100             1077.00      1037.00   -3.71%
BenchmarkRWMutexWorkWrite100-2            659.00       596.00   -9.56%
BenchmarkRWMutexWorkWrite100-4            509.00       361.00  -29.08%
BenchmarkRWMutexWorkWrite100-8            603.00       351.00  -41.79%
BenchmarkRWMutexWorkWrite100-16           750.00       607.00  -19.07%
BenchmarkRWMutexWorkWrite10               990.00       951.00   -3.94%
BenchmarkRWMutexWorkWrite10-2            1119.00      1070.00   -4.38%
BenchmarkRWMutexWorkWrite10-4            1300.00      1199.00   -7.77%
BenchmarkRWMutexWorkWrite10-8            1424.00      1291.00   -9.34%
BenchmarkRWMutexWorkWrite10-16           1981.00      1786.00   -9.84%

R=rsc
CC=golang-dev
https://golang.org/cl/4671051
This commit is contained in:
Dmitriy Vyukov 2011-07-12 09:24:21 -07:00 committed by Russ Cox
parent 4c63129545
commit daaf29cf93
2 changed files with 118 additions and 30 deletions

View File

@ -4,7 +4,10 @@
package sync
import "sync/atomic"
import (
"runtime"
"sync/atomic"
)
// An RWMutex is a reader/writer mutual exclusion lock.
// The lock can be held by an arbitrary number of readers
@ -12,35 +15,22 @@ import "sync/atomic"
// RWMutexes can be created as part of other
// structures; the zero value for a RWMutex is
// an unlocked mutex.
//
// Writers take priority over Readers: no new RLocks
// are granted while a blocked Lock call is waiting.
type RWMutex struct {
w Mutex // held if there are pending readers or writers
r Mutex // held if the w is being rd
readerCount int32 // number of pending readers
w Mutex // held if there are pending writers
writerSem uint32 // semaphore for writers to wait for completing readers
readerSem uint32 // semaphore for readers to wait for completing writers
readerCount int32 // number of pending readers
readerWait int32 // number of departing readers
}
const rwmutexMaxReaders = 1 << 30
// RLock locks rw for reading.
// If the lock is already locked for writing or there is a writer already waiting
// to release the lock, RLock blocks until the writer has released the lock.
func (rw *RWMutex) RLock() {
// Use rw.r.Lock() to block granting the RLock if a goroutine
// is waiting for its Lock. This is the prevent starvation of W in
// this situation:
// A: rw.RLock() // granted
// W: rw.Lock() // waiting for rw.w().Lock()
// B: rw.RLock() // granted
// C: rw.RLock() // granted
// B: rw.RUnlock()
// ... (new readers come and go indefinitely, W is starving)
rw.r.Lock()
if atomic.AddInt32(&rw.readerCount, 1) == 1 {
// The first reader locks rw.w, so writers will be blocked
// while the readers have the RLock.
rw.w.Lock()
if atomic.AddInt32(&rw.readerCount, 1) < 0 {
// A writer is pending, wait for it.
runtime.Semacquire(&rw.readerSem)
}
rw.r.Unlock()
}
// RUnlock undoes a single RLock call;
@ -48,9 +38,12 @@ func (rw *RWMutex) RLock() {
// It is a run-time error if rw is not locked for reading
// on entry to RUnlock.
func (rw *RWMutex) RUnlock() {
if atomic.AddInt32(&rw.readerCount, -1) == 0 {
// last reader finished, enable writers
rw.w.Unlock()
if atomic.AddInt32(&rw.readerCount, -1) < 0 {
// A writer is pending.
if atomic.AddInt32(&rw.readerWait, -1) == 0 {
// The last reader unblocks the writer.
runtime.Semrelease(&rw.writerSem)
}
}
}
@ -61,9 +54,14 @@ func (rw *RWMutex) RUnlock() {
// a blocked Lock call excludes new readers from acquiring
// the lock.
func (rw *RWMutex) Lock() {
rw.r.Lock()
// First, resolve competition with other writers.
rw.w.Lock()
rw.r.Unlock()
// Announce to readers there is a pending writer.
r := atomic.AddInt32(&rw.readerCount, -rwmutexMaxReaders) + rwmutexMaxReaders
// Wait for active readers.
if r != 0 && atomic.AddInt32(&rw.readerWait, r) != 0 {
runtime.Semacquire(&rw.writerSem)
}
}
// Unlock unlocks rw for writing. It is a run-time error if rw is
@ -72,7 +70,16 @@ func (rw *RWMutex) Lock() {
// As with Mutexes, a locked RWMutex is not associated with a particular
// goroutine. One goroutine may RLock (Lock) an RWMutex and then
// arrange for another goroutine to RUnlock (Unlock) it.
func (rw *RWMutex) Unlock() { rw.w.Unlock() }
func (rw *RWMutex) Unlock() {
// Announce to readers there is no active writer.
r := atomic.AddInt32(&rw.readerCount, rwmutexMaxReaders)
// Unblock blocked readers, if any.
for i := 0; i < int(r); i++ {
runtime.Semrelease(&rw.readerSem)
}
// Allow other writers to proceed.
rw.w.Unlock()
}
// RLocker returns a Locker interface that implements
// the Lock and Unlock methods by calling rw.RLock and rw.RUnlock.

View File

@ -154,3 +154,84 @@ func TestRLocker(t *testing.T) {
wl.Unlock()
}
}
func BenchmarkRWMutexUncontended(b *testing.B) {
type PaddedRWMutex struct {
RWMutex
pad [32]uint32
}
const CallsPerSched = 1000
procs := runtime.GOMAXPROCS(-1)
N := int32(b.N / CallsPerSched)
c := make(chan bool, procs)
for p := 0; p < procs; p++ {
go func() {
var rwm PaddedRWMutex
for atomic.AddInt32(&N, -1) >= 0 {
runtime.Gosched()
for g := 0; g < CallsPerSched; g++ {
rwm.RLock()
rwm.RLock()
rwm.RUnlock()
rwm.RUnlock()
rwm.Lock()
rwm.Unlock()
}
}
c <- true
}()
}
for p := 0; p < procs; p++ {
<-c
}
}
func benchmarkRWMutex(b *testing.B, localWork, writeRatio int) {
const CallsPerSched = 1000
procs := runtime.GOMAXPROCS(-1)
N := int32(b.N / CallsPerSched)
c := make(chan bool, procs)
var rwm RWMutex
for p := 0; p < procs; p++ {
go func() {
foo := 0
for atomic.AddInt32(&N, -1) >= 0 {
runtime.Gosched()
for g := 0; g < CallsPerSched; g++ {
foo++
if foo%writeRatio == 0 {
rwm.Lock()
rwm.Unlock()
} else {
rwm.RLock()
for i := 0; i != localWork; i += 1 {
foo *= 2
foo /= 2
}
rwm.RUnlock()
}
}
}
c <- foo == 42
}()
}
for p := 0; p < procs; p++ {
<-c
}
}
func BenchmarkRWMutexWrite100(b *testing.B) {
benchmarkRWMutex(b, 0, 100)
}
func BenchmarkRWMutexWrite10(b *testing.B) {
benchmarkRWMutex(b, 0, 10)
}
func BenchmarkRWMutexWorkWrite100(b *testing.B) {
benchmarkRWMutex(b, 100, 100)
}
func BenchmarkRWMutexWorkWrite10(b *testing.B) {
benchmarkRWMutex(b, 100, 10)
}