From 4a4fa38d0e86064c6fec9e0fa9f60cb1782bcaff Mon Sep 17 00:00:00 2001 From: Marcel van Lohuizen Date: Wed, 24 Aug 2011 11:05:45 +0200 Subject: [PATCH] exp/norm: Reduced the size of the byte buffer used by reorderBuffer by half by reusing space when combining. R=r CC=golang-dev https://golang.org/cl/4939042 --- src/pkg/exp/norm/composition.go | 34 +++++++++++++++++---------------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/src/pkg/exp/norm/composition.go b/src/pkg/exp/norm/composition.go index 87838999fd..ecaae61ce1 100644 --- a/src/pkg/exp/norm/composition.go +++ b/src/pkg/exp/norm/composition.go @@ -12,16 +12,14 @@ const ( maxNFCExpansion = 3 // NFC(0x1D160) maxNFKCExpansion = 18 // NFKC(0xFDFA) - maxRuneSizeInDecomp = 4 - // Need to multiply by 2 as we don't reuse byte buffer space for recombining. - maxByteBufferSize = 2 * maxRuneSizeInDecomp * maxCombiningChars // 256 + maxByteBufferSize = utf8.UTFMax * maxCombiningChars // 128 ) // reorderBuffer is used to normalize a single segment. Characters inserted with -// insert() are decomposed and reordered based on CCC. The compose() method can +// insert are decomposed and reordered based on CCC. The compose method can // be used to recombine characters. Note that the byte buffer does not hold // the UTF-8 characters in order. Only the rune array is maintained in sorted -// order. flush() writes the resulting segment to a byte array. +// order. flush writes the resulting segment to a byte array. type reorderBuffer struct { rune [maxCombiningChars]runeInfo // Per character info. byte [maxByteBufferSize]byte // UTF-8 buffer. Referenced by runeInfo.pos. @@ -68,7 +66,7 @@ func (rb *reorderBuffer) insertOrdered(info runeInfo) bool { } rb.nrune += 1 pos := uint8(rb.nbyte) - rb.nbyte += info.size + rb.nbyte += utf8.UTFMax info.pos = pos b[n] = info return true @@ -81,17 +79,20 @@ func (rb *reorderBuffer) insert(src []byte, info runeInfo) bool { rune, _ := utf8.DecodeRune(src) return rb.decomposeHangul(uint32(rune)) } - pos := rb.nbyte if info.flags.hasDecomposition() { dcomp := rb.f.decompose(src) - for i := 0; i < len(dcomp); i += int(info.size) { + for i := 0; i < len(dcomp); { info = rb.f.info(dcomp[i:]) + pos := rb.nbyte if !rb.insertOrdered(info) { return false } + end := i + int(info.size) + copy(rb.byte[pos:], dcomp[i:end]) + i = end } - copy(rb.byte[pos:], dcomp) } else { + pos := rb.nbyte if !rb.insertOrdered(info) { return false } @@ -107,21 +108,23 @@ func (rb *reorderBuffer) insertString(src string, info runeInfo) bool { rune, _ := utf8.DecodeRuneInString(src) return rb.decomposeHangul(uint32(rune)) } - pos := rb.nbyte if info.flags.hasDecomposition() { dcomp := rb.f.decomposeString(src) - for i := 0; i < len(dcomp); i += int(info.size) { + for i := 0; i < len(dcomp); { info = rb.f.info(dcomp[i:]) + pos := rb.nbyte if !rb.insertOrdered(info) { return false } + end := i + int(info.size) + copy(rb.byte[pos:], dcomp[i:end]) + i = end } - copy(rb.byte[pos:], dcomp) } else { + copy(rb.byte[rb.nbyte:], src[:info.size]) if !rb.insertOrdered(info) { return false } - copy(rb.byte[pos:], src[:info.size]) } return true } @@ -130,17 +133,16 @@ func (rb *reorderBuffer) insertString(src string, info runeInfo) bool { func (rb *reorderBuffer) appendRune(rune uint32) { bn := rb.nbyte sz := utf8.EncodeRune(rb.byte[bn:], int(rune)) - rb.nbyte += uint8(sz) + rb.nbyte += utf8.UTFMax rb.rune[rb.nrune] = runeInfo{bn, uint8(sz), 0, 0} rb.nrune++ } // assignRune sets a rune at position pos. It is used for Hangul and recomposition. func (rb *reorderBuffer) assignRune(pos int, rune uint32) { - bn := rb.nbyte + bn := rb.rune[pos].pos sz := utf8.EncodeRune(rb.byte[bn:], int(rune)) rb.rune[pos] = runeInfo{bn, uint8(sz), 0, 0} - rb.nbyte += uint8(sz) } // runeAt returns the rune at position n. It is used for Hangul and recomposition.