[dev.ssa] cmd/compile: better register allocation

Use a more precise computation of next use. It properly detects lifetime holes and deallocates values during those holes. It also uses a more precise version of distance to next use which affects which values get spilled. Change-Id: I49eb3ebe2d2cb64842ecdaa7fb4f3792f8afb90b Reviewed-on: https://go-review.googlesource.com/16760 Run-TryBot: Keith Randall <khr@golang.org> Reviewed-by: David Chase <drchase@google.com>
2024-09-22 02:48:50 +00:00 · 2015-11-05 14:59:47 -08:00 · 2015-11-05 14:59:47 -08:00 · 75102afce7
commit 75102afce7
parent 7807bda91d
2 changed files with 318 additions and 182 deletions
--- a/src/cmd/compile/internal/ssa/regalloc.go
+++ b/src/cmd/compile/internal/ssa/regalloc.go
@ -202,41 +202,43 @@ func pickReg(r regMask) register {
 	}
 }

-// A use is a record of a position (2*pc for value uses, odd numbers for other uses)
-// and a value ID that is used at that position.
 type use struct {
-	idx int32
-	vid ID
+	dist int32 // distance from start of the block to a use of a value
+	next *use  // linked list of uses of a value in nondecreasing dist order
 }

 type valState struct {
 	regs       regMask // the set of registers holding a Value (usually just one)
-	uses       []int32 // sorted list of places where Value is used
-	usestorage [2]int32
-	spill      *Value // spilled copy of the Value
-	spill2     *Value // special alternate spill location used for phi resolution
+	uses       *use    // list of uses in this block
+	spill      *Value  // spilled copy of the Value
+	spill2     *Value  // special alternate spill location used for phi resolution
 	spillUsed  bool
 	spill2used bool
 }

 type regState struct {
 	v *Value // Original (preregalloc) Value stored in this register.
-	c *Value // A Value equal to v which is currently in register.  Might be v or a copy of it.
+	c *Value // A Value equal to v which is currently in a register.  Might be v or a copy of it.
 	// If a register is unused, v==c==nil
 }

 type regAllocState struct {
 	f *Func

+	// For each value, whether it needs a register or not.
+	// Cached value of !v.Type.IsMemory() && !v.Type.IsVoid().
+	needReg []bool
+
 	// for each block, its primary predecessor.
 	// A predecessor of b is primary if it is the closest
 	// predecessor that appears before b in the layout order.
 	// We record the index in the Preds list where the primary predecessor sits.
 	primary []int32

-	// live values on each edge.  live[b.ID][idx] is a list of value IDs
-	// which are live on b's idx'th successor edge.
-	live [][][]ID
+	// live values at the end of each block.  live[b.ID] is a list of value IDs
+	// which are live at the end of b, together with a count of how many instructions
+	// forward to the next use.
+	live [][]liveInfo

 	// current state of each (preregalloc) Value
 	values []valState
@ -254,14 +256,14 @@ type regAllocState struct {
 	// mask of registers currently in use
 	used regMask

-	// An ordered list (by idx) of all uses in the function
-	uses []use
-
 	// Home locations (registers) for Values
 	home []Location

 	// current block we're working on
 	curBlock *Block
+
+	// cache of use records
+	freeUseRecords *use
 }

 // freeReg frees up register r.  Any current user of r is kicked out.
@ -350,18 +352,25 @@ func (s *regAllocState) allocReg(mask regMask) register {
 	// farthest-in-the-future use.
 	// TODO: Prefer registers with already spilled Values?
 	// TODO: Modify preference using affinity graph.
+	// TODO: if a single value is in multiple registers, spill one of them
+	// before spilling a value in just a single register.

 	// SP and SB are allocated specially.  No regular value should
 	// be allocated to them.
 	mask &^= 1<<4 | 1<<32

+	// Find a register to spill.  We spill the register containing the value
+	// whose next use is as far in the future as possible.
+	// https://en.wikipedia.org/wiki/Page_replacement_algorithm#The_theoretically_optimal_page_replacement_algorithm
 	maxuse := int32(-1)
 	for t := register(0); t < numRegs; t++ {
 		if mask>>t&1 == 0 {
 			continue
 		}
 		v := s.regs[t].v
-		if len(s.values[v.ID].uses) == 0 {
+
+		if s.values[v.ID].uses == nil {
+			// No subsequent use.
 			// This can happen when fixing up merge blocks at the end.
 			// We've already run through the use lists so they are empty.
 			// Any register would be ok at this point.
@ -369,7 +378,9 @@ func (s *regAllocState) allocReg(mask regMask) register {
 			maxuse = 0
 			break
 		}
-		if n := s.values[v.ID].uses[0]; n > maxuse {
+		if n := s.values[v.ID].uses.dist; n > maxuse {
+			// v's next use is farther in the future than any value
+			// we've seen so far.  A new best spill candidate.
 			r = t
 			maxuse = n
 		}
@ -402,7 +413,12 @@ func (s *regAllocState) allocValToReg(v *Value, mask regMask, nospill bool) *Val
 		return s.regs[r].c
 	}

-	mask &^= 1<<4 | 1<<32 // don't spill SP or SB
+	if v.Op != OpSP {
+		mask &^= 1 << 4 // dont' spill SP
+	}
+	if v.Op != OpSB {
+		mask &^= 1 << 32 // don't spill SB
+	}
 	mask &^= s.reserved()

 	// Allocate a register.
@ -484,18 +500,20 @@ func (s *regAllocState) init(f *Func) {
 	}

 	s.f = f
+	s.needReg = make([]bool, f.NumValues())
 	s.regs = make([]regState, numRegs)
 	s.values = make([]valState, f.NumValues())
-	for i := range s.values {
-		s.values[i].uses = s.values[i].usestorage[:0]
-	}
 	s.orig = make([]*Value, f.NumValues())
 	for _, b := range f.Blocks {
 		for _, v := range b.Values {
+			if v.Type.IsMemory() || v.Type.IsVoid() {
+				continue
+			}
+			s.needReg[v.ID] = true
 			s.orig[v.ID] = v
 		}
 	}
-	s.live = f.live()
+	s.computeLive()

 	// Compute block order.  This array allows us to distinguish forward edges
 	// from backward edges and compute how far they go.
@ -518,63 +536,41 @@ func (s *regAllocState) init(f *Func) {
 		}
 		s.primary[b.ID] = int32(best)
 	}
+}

-	// Compute uses.  We assign a PC to each Value in the program, in f.Blocks
-	// and then b.Values order.  Uses are recorded using this numbering.
-	// Uses by Values are recorded as 2*PC.  Special uses (block control values,
-	// pseudo-uses for backedges) are recorded as 2*(last PC in block)+1.
-	var pc int32
-	for _, b := range f.Blocks {
-		// uses in regular Values
-		for _, v := range b.Values {
-			for _, a := range v.Args {
-				s.values[a.ID].uses = append(s.values[a.ID].uses, pc*2)
-				s.uses = append(s.uses, use{pc * 2, a.ID})
-			}
-			pc++
-		}
-		// use as a block control value
-		endIdx := pc*2 - 1
-		if b.Control != nil {
-			s.values[b.Control.ID].uses = append(s.values[b.Control.ID].uses, endIdx)
-			s.uses = append(s.uses, use{endIdx, b.Control.ID})
-		}
-		// uses by backedges
-		// Backedges are treated as uses so that the uses span the entire live
-		// range of the value.
-		for i, c := range b.Succs {
-			if blockOrder[c.ID] > blockOrder[b.ID] {
-				continue // forward edge
-			}
-			for _, vid := range s.live[b.ID][i] {
-				s.values[vid].uses = append(s.values[vid].uses, endIdx)
-				s.uses = append(s.uses, use{endIdx, vid})
-			}
-		}
+// Adds a use record for id at distance dist from the start of the block.
+// All calls to addUse must happen with nonincreasing dist.
+func (s *regAllocState) addUse(id ID, dist int32) {
+	r := s.freeUseRecords
+	if r != nil {
+		s.freeUseRecords = r.next
+	} else {
+		r = &use{}
 	}
-	if pc*2 < 0 {
-		f.Fatalf("pc too large: function too big")
+	r.dist = dist
+	r.next = s.values[id].uses
+	s.values[id].uses = r
+	if r.next != nil && dist > r.next.dist {
+		s.f.Fatalf("uses added in wrong order")
 	}
 }

-// clearUses drops any uses <= useIdx.  Any values which have no future
-// uses are dropped from registers.
-func (s *regAllocState) clearUses(useIdx int32) {
-	for len(s.uses) > 0 && s.uses[0].idx <= useIdx {
-		idx := s.uses[0].idx
-		vid := s.uses[0].vid
-		s.uses = s.uses[1:]
-
-		vi := &s.values[vid]
-		if vi.uses[0] != idx {
-			s.f.Fatalf("use mismatch for v%d\n", vid)
-		}
-		vi.uses = vi.uses[1:]
-		if len(vi.uses) != 0 {
+// advanceUses advances the uses of v's args from the state before v to the state after v.
+// Any values which have no more uses are deallocated from registers.
+func (s *regAllocState) advanceUses(v *Value) {
+	for _, a := range v.Args {
+		if !s.needReg[a.ID] {
 			continue
 		}
-		// Value is dead, free all registers that hold it (except SP & SB).
-		s.freeRegs(vi.regs &^ (1<<4 | 1<<32))
+		ai := &s.values[a.ID]
+		r := ai.uses
+		ai.uses = r.next
+		if r.next == nil {
+			// Value is dead, free all registers that hold it.
+			s.freeRegs(ai.regs)
+		}
+		r.next = s.freeUseRecords
+		s.freeUseRecords = r
 	}
 }

@ -601,28 +597,69 @@ func (s *regAllocState) compatRegs(v *Value) regMask {
 }

 func (s *regAllocState) regalloc(f *Func) {
-	liveset := newSparseSet(f.NumValues())
+	liveSet := newSparseSet(f.NumValues())
 	argset := newSparseSet(f.NumValues())
 	var oldSched []*Value
 	var phis []*Value
 	var stackPhis []*Value
 	var regPhis []*Value
+	var phiRegs []register
+	var args []*Value

 	if f.Entry != f.Blocks[0] {
 		f.Fatalf("entry block must be first")
 	}

-	var phiRegs []register
-
 	// For each merge block, we record the starting register state (after phi ops)
 	// for that merge block.  Indexed by blockid/regnum.
 	startRegs := make([][]*Value, f.NumBlocks())
 	// end state of registers for each block, idexed by blockid/regnum.
 	endRegs := make([][]regState, f.NumBlocks())
-	var pc int32
 	for _, b := range f.Blocks {
 		s.curBlock = b

+		// Initialize liveSet and uses fields for this block.
+		// Walk backwards through the block doing liveness analysis.
+		liveSet.clear()
+		for _, e := range s.live[b.ID] {
+			s.addUse(e.ID, int32(len(b.Values))+e.dist) // pseudo-uses from beyond end of block
+			liveSet.add(e.ID)
+		}
+		if c := b.Control; c != nil && s.needReg[c.ID] {
+			s.addUse(c.ID, int32(len(b.Values))) // psuedo-use by control value
+			liveSet.add(c.ID)
+		}
+		for i := len(b.Values) - 1; i >= 0; i-- {
+			v := b.Values[i]
+			if v.Op == OpPhi {
+				break // Don't process phi ops.
+			}
+			liveSet.remove(v.ID)
+			for _, a := range v.Args {
+				if !s.needReg[a.ID] {
+					continue
+				}
+				s.addUse(a.ID, int32(i))
+				liveSet.add(a.ID)
+			}
+		}
+		if regDebug {
+			fmt.Printf("uses for %s:%s\n", s.f.Name, b)
+			for i := range s.values {
+				vi := &s.values[i]
+				u := vi.uses
+				if u == nil {
+					continue
+				}
+				fmt.Printf("v%d:", i)
+				for u != nil {
+					fmt.Printf(" %d", u.dist)
+					u = u.next
+				}
+				fmt.Println()
+			}
+		}
+
 		// Make a copy of the block schedule so we can generate a new one in place.
 		// We make a separate copy for phis and regular values.
 		nphi := 0
@ -648,6 +685,15 @@ func (s *regAllocState) regalloc(f *Func) {
 			if nphi > 0 {
 				f.Fatalf("phis in single-predecessor block")
 			}
+			// Drop any values which are no longer live.
+			// This may happen because at the end of p, a value may be
+			// live but only used by some other successor of p.
+			for r := register(0); r < numRegs; r++ {
+				v := s.regs[r].v
+				if v != nil && !liveSet.contains(v.ID) {
+					s.freeReg(r)
+				}
+			}
 		} else {
 			// This is the complicated case.  We have more than one predecessor,
 			// which means we may have Phi ops.
@ -663,25 +709,6 @@ func (s *regAllocState) regalloc(f *Func) {
 			p := b.Preds[idx]
 			s.setState(endRegs[p.ID])

-			// Drop anything not live on the c->b edge.
-			var idx2 int
-			for idx2 = 0; idx2 < len(p.Succs); idx2++ {
-				if p.Succs[idx2] == b {
-					break
-				}
-			}
-			liveset.clear()
-			liveset.addAll(s.live[p.ID][idx2])
-			for r := register(0); r < numRegs; r++ {
-				v := s.regs[r].v
-				if v == nil {
-					continue
-				}
-				if !liveset.contains(v.ID) {
-					s.freeReg(r)
-				}
-			}
-
 			// Decide on registers for phi ops.  Use the registers determined
 			// by the primary predecessor if we can.
 			// TODO: pick best of (already processed) predecessors?
@ -742,21 +769,20 @@ func (s *regAllocState) regalloc(f *Func) {
 		}

 		// Process all the non-phi values.
-		pc += int32(nphi)
-		for _, v := range oldSched {
+		for idx, v := range oldSched {
 			if v.Op == OpPhi {
 				f.Fatalf("phi %s not at start of block", v)
 			}
 			if v.Op == OpSP {
 				s.assignReg(4, v, v) // TODO: arch-dependent
 				b.Values = append(b.Values, v)
-				pc++
+				s.advanceUses(v)
 				continue
 			}
 			if v.Op == OpSB {
 				s.assignReg(32, v, v) // TODO: arch-dependent
 				b.Values = append(b.Values, v)
-				pc++
+				s.advanceUses(v)
 				continue
 			}
 			if v.Op == OpArg {
@ -766,19 +792,17 @@ func (s *regAllocState) regalloc(f *Func) {
 				s.values[v.ID].spill = v
 				s.values[v.ID].spillUsed = true // use is guaranteed
 				b.Values = append(b.Values, v)
-				pc++
+				s.advanceUses(v)
 				continue
 			}
-			s.clearUses(pc*2 - 1)
 			regspec := opcodeTable[v.Op].reg
 			if regDebug {
-				fmt.Printf("%d: working on %s %s %v\n", pc, v, v.LongString(), regspec)
+				fmt.Printf("%d: working on %s %s %v\n", idx, v, v.LongString(), regspec)
 			}
 			if len(regspec.inputs) == 0 && len(regspec.outputs) == 0 {
 				// No register allocation required (or none specified yet)
 				s.freeRegs(regspec.clobbers)
 				b.Values = append(b.Values, v)
-				pc++
 				continue
 			}

@ -786,22 +810,23 @@ func (s *regAllocState) regalloc(f *Func) {
 				// Value is rematerializeable, don't issue it here.
 				// It will get issued just before each use (see
 				// allocValueToReg).
-				pc++
+				s.advanceUses(v)
 				continue
 			}

-			// Move arguments to registers
+			// Move arguments to registers.  Process in an ordering defined
+			// by the register specification (most constrained first).
+			args = append(args[:0], v.Args...)
 			for _, i := range regspec.inputs {
-				a := v.Args[i.idx]
-				v.Args[i.idx] = s.allocValToReg(a, i.regs, true)
+				args[i.idx] = s.allocValToReg(v.Args[i.idx], i.regs, true)
 			}

 			// Now that all args are in regs, we're ready to issue the value itself.
-			// Before we pick a register for the value, allow input registers
+			// Before we pick a register for the output value, allow input registers
 			// to be deallocated. We do this here so that the output can use the
 			// same register as a dying input.
 			s.nospill = 0
-			s.clearUses(pc * 2)
+			s.advanceUses(v) // frees any registers holding args that are no longer live

 			// Dump any registers which will be clobbered
 			s.freeRegs(regspec.clobbers)
@ -818,34 +843,60 @@ func (s *regAllocState) regalloc(f *Func) {
 			}

 			// Issue the Value itself.
+			for i, a := range args {
+				v.Args[i] = a // use register version of arguments
+			}
 			b.Values = append(b.Values, v)

 			// Issue a spill for this value.  We issue spills unconditionally,
 			// then at the end of regalloc delete the ones we never use.
+			// TODO: schedule the spill at a point that dominates all restores.
+			// The restore may be off in an unlikely branch somewhere and it
+			// would be better to have the spill in that unlikely branch as well.
+			// v := ...
+			// if unlikely {
+			//     f()
+			// }
+			// It would be good to have both spill and restore inside the IF.
 			if !v.Type.IsFlags() {
 				spill := b.NewValue1(v.Line, OpStoreReg, v.Type, v)
 				s.setOrig(spill, v)
 				s.values[v.ID].spill = spill
 				s.values[v.ID].spillUsed = false
 			}
-
-			// Increment pc for next Value.
-			pc++
 		}

-		// Load control value into reg
-		if b.Control != nil && !b.Control.Type.IsMemory() && !b.Control.Type.IsVoid() {
+		if c := b.Control; c != nil && s.needReg[c.ID] {
+			// Load control value into reg.
 			// TODO: regspec for block control values, instead of using
 			// register set from the control op's output.
-			s.allocValToReg(b.Control, opcodeTable[b.Control.Op].reg.outputs[0], false)
+			s.allocValToReg(c, opcodeTable[c.Op].reg.outputs[0], false)
+			// Remove this use from the uses list.
+			u := s.values[c.ID].uses
+			s.values[c.ID].uses = u.next
+			u.next = s.freeUseRecords
+			s.freeUseRecords = u
 		}

 		// Record endRegs
 		endRegs[b.ID] = make([]regState, numRegs)
 		copy(endRegs[b.ID], s.regs)

-		// Allow control Values and Values live only on backedges to be dropped.
-		s.clearUses(pc*2 - 1)
+		// Clear any final uses.
+		// All that is left should be the pseudo-uses added for values which
+		// are live at the end of b.
+		for _, e := range s.live[b.ID] {
+			u := s.values[e.ID].uses
+			if u == nil {
+				f.Fatalf("live at end, no uses v%d", e.ID)
+			}
+			if u.next != nil {
+				f.Fatalf("live at end, too many uses v%d", e.ID)
+			}
+			s.values[e.ID].uses = nil
+			u.next = s.freeUseRecords
+			s.freeUseRecords = u
+		}
 	}

 	// Process merge block input edges.  They are the tricky ones.
@ -1034,20 +1085,24 @@ func (v *Value) rematerializeable() bool {
 	return false
 }

-// live returns a map from block ID and successor edge index to a list
-// of value IDs live on that edge.
+type liveInfo struct {
+	ID   ID    // ID of variable
+	dist int32 // # of instructions before next use
+}
+
+// computeLive computes a map from block ID to a list of value IDs live at the end
+// of that block.  Together with the value ID is a count of how many instructions
+// to the next use of that value.  The resulting map is stored at s.live.
 // TODO: this could be quadratic if lots of variables are live across lots of
 // basic blocks.  Figure out a way to make this function (or, more precisely, the user
 // of this function) require only linear size & time.
-func (f *Func) live() [][][]ID {
-	live := make([][][]ID, f.NumBlocks())
-	for _, b := range f.Blocks {
-		live[b.ID] = make([][]ID, len(b.Succs))
-	}
+func (s *regAllocState) computeLive() {
+	f := s.f
+	s.live = make([][]liveInfo, f.NumBlocks())
 	var phis []*Value

-	s := newSparseSet(f.NumValues())
-	t := newSparseSet(f.NumValues())
+	live := newSparseMap(f.NumValues())
+	t := newSparseMap(f.NumValues())

 	// Instead of iterating over f.Blocks, iterate over their postordering.
 	// Liveness information flows backward, so starting at the end
@ -1061,20 +1116,22 @@ func (f *Func) live() [][][]ID {
 	po := postorder(f)
 	for {
 		for _, b := range po {
-			f.Logf("live %s %v\n", b, live[b.ID])
+			f.Logf("live %s %v\n", b, s.live[b.ID])
 		}
 		changed := false

 		for _, b := range po {
-			// Start with known live values at the end of the block
-			s.clear()
-			for i := 0; i < len(b.Succs); i++ {
-				s.addAll(live[b.ID][i])
+			// Start with known live values at the end of the block.
+			// Add len(b.Values) to adjust from end-of-block distance
+			// to beginning-of-block distance.
+			live.clear()
+			for _, e := range s.live[b.ID] {
+				live.set(e.ID, e.dist+int32(len(b.Values)))
 			}

 			// Mark control value as live
-			if b.Control != nil {
-				s.add(b.Control.ID)
+			if b.Control != nil && s.needReg[b.Control.ID] {
+				live.set(b.Control.ID, int32(len(b.Values)))
 			}

 			// Propagate backwards to the start of the block
@ -1082,36 +1139,75 @@ func (f *Func) live() [][][]ID {
 			phis := phis[:0]
 			for i := len(b.Values) - 1; i >= 0; i-- {
 				v := b.Values[i]
-				s.remove(v.ID)
+				live.remove(v.ID)
 				if v.Op == OpPhi {
 					// save phi ops for later
 					phis = append(phis, v)
 					continue
 				}
-				s.addAllValues(v.Args)
-			}
-
-			// for each predecessor of b, expand its list of live-at-end values
-			// invariant: s contains the values live at the start of b (excluding phi inputs)
-			for i, p := range b.Preds {
-				// Find index of b in p's successors.
-				var j int
-				for j = 0; j < len(p.Succs); j++ {
-					if p.Succs[j] == b {
-						break
+				for _, a := range v.Args {
+					if s.needReg[a.ID] {
+						live.set(a.ID, int32(i))
 					}
 				}
-				t.clear()
-				t.addAll(live[p.ID][j])
-				t.addAll(s.contents())
-				for _, v := range phis {
-					t.add(v.Args[i].ID)
+			}
+
+			// For each predecessor of b, expand its list of live-at-end values.
+			// invariant: live contains the values live at the start of b (excluding phi inputs)
+			for i, p := range b.Preds {
+				// Compute additional distance for the edge.
+				const normalEdge = 10
+				const likelyEdge = 1
+				const unlikelyEdge = 100
+				// Note: delta must be at least 1 to distinguish the control
+				// value use from the first user in a successor block.
+				delta := int32(normalEdge)
+				if len(p.Succs) == 2 {
+					if p.Succs[0] == b && p.Likely == BranchLikely ||
+						p.Succs[1] == b && p.Likely == BranchUnlikely {
+						delta = likelyEdge
+					}
+					if p.Succs[0] == b && p.Likely == BranchUnlikely ||
+						p.Succs[1] == b && p.Likely == BranchLikely {
+						delta = unlikelyEdge
+					}
 				}
-				if t.size() == len(live[p.ID][j]) {
+
+				// Start t off with the previously known live values at the end of p.
+				t.clear()
+				for _, e := range s.live[p.ID] {
+					t.set(e.ID, e.dist)
+				}
+				update := false
+
+				// Add new live values from scanning this block.
+				for _, e := range live.contents() {
+					d := e.val + delta
+					if !t.contains(e.key) || d < t.get(e.key) {
+						update = true
+						t.set(e.key, d)
+					}
+				}
+				// Also add the correct arg from the saved phi values.
+				// All phis are at distance delta (we consider them
+				// simultaneously happening at the start of the block).
+				for _, v := range phis {
+					id := v.Args[i].ID
+					if s.needReg[id] && !t.contains(id) || delta < t.get(id) {
+						update = true
+						t.set(id, delta)
+					}
+				}
+
+				if !update {
 					continue
 				}
-				// grow p's live set
-				live[p.ID][j] = append(live[p.ID][j][:0], t.contents()...)
+				// The live set has changed, update it.
+				l := s.live[p.ID][:0]
+				for _, e := range t.contents() {
+					l = append(l, liveInfo{e.key, e.val})
+				}
+				s.live[p.ID] = l
 				changed = true
 			}
 		}
@ -1120,35 +1216,6 @@ func (f *Func) live() [][][]ID {
 			break
 		}
 	}
-
-	// Make sure that there is only one live memory variable in each set.
-	// Ideally we should check this at every instructiom, but at every
-	// edge seems good enough for now.
-	isMem := make([]bool, f.NumValues())
-	for _, b := range f.Blocks {
-		for _, v := range b.Values {
-			isMem[v.ID] = v.Type.IsMemory()
-		}
-	}
-	for _, b := range f.Blocks {
-		for i, c := range b.Succs {
-			nmem := 0
-			for _, id := range live[b.ID][i] {
-				if isMem[id] {
-					nmem++
-				}
-			}
-			if nmem > 1 {
-				f.Fatalf("more than one mem live on edge %v->%v: %v", b, c, live[b.ID][i])
-			}
-			// TODO: figure out why we get nmem==0 occasionally.
-			//if nmem == 0 {
-			//	f.Fatalf("no mem live on edge %v->%v: %v", b, c, live[b.ID][i])
-			//}
-		}
-	}
-
-	return live
 }

 // reserved returns a mask of reserved registers.
--- a/src/cmd/compile/internal/ssa/sparsemap.go
+++ b/src/cmd/compile/internal/ssa/sparsemap.go
@ -0,0 +1,69 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ssa
+
+// from http://research.swtch.com/sparse
+// in turn, from Briggs and Torczon
+
+type sparseEntry struct {
+	key ID
+	val int32
+}
+
+type sparseMap struct {
+	dense  []sparseEntry
+	sparse []int
+}
+
+// newSparseMap returns a sparseMap that can map
+// integers between 0 and n-1 to int32s.
+func newSparseMap(n int) *sparseMap {
+	return &sparseMap{nil, make([]int, n)}
+}
+
+func (s *sparseMap) size() int {
+	return len(s.dense)
+}
+
+func (s *sparseMap) contains(k ID) bool {
+	i := s.sparse[k]
+	return i < len(s.dense) && s.dense[i].key == k
+}
+
+func (s *sparseMap) get(k ID) int32 {
+	i := s.sparse[k]
+	if i < len(s.dense) && s.dense[i].key == k {
+		return s.dense[i].val
+	}
+	return -1
+}
+
+func (s *sparseMap) set(k ID, v int32) {
+	i := s.sparse[k]
+	if i < len(s.dense) && s.dense[i].key == k {
+		s.dense[i].val = v
+		return
+	}
+	s.dense = append(s.dense, sparseEntry{k, v})
+	s.sparse[k] = len(s.dense) - 1
+}
+
+func (s *sparseMap) remove(k ID) {
+	i := s.sparse[k]
+	if i < len(s.dense) && s.dense[i].key == k {
+		y := s.dense[len(s.dense)-1]
+		s.dense[i] = y
+		s.sparse[y.key] = i
+		s.dense = s.dense[:len(s.dense)-1]
+	}
+}
+
+func (s *sparseMap) clear() {
+	s.dense = s.dense[:0]
+}
+
+func (s *sparseMap) contents() []sparseEntry {
+	return s.dense
+}