compress/flate: optimize history-copy decoding.

The forwardCopy function could be re-written in asm, and the copyHuff
method could probably be rolled into huffmanBlock and copyHist, but
I'm leaving those changes for future CLs.

compress/flate benchmarks:
benchmark                                 old ns/op    new ns/op    delta
BenchmarkDecoderBestSpeed1K                  385327       435140  +12.93%
BenchmarkDecoderBestSpeed10K                1245190      1062112  -14.70%
BenchmarkDecoderBestSpeed100K               8512365      5833680  -31.47%
BenchmarkDecoderDefaultCompression1K         382225       421301  +10.22%
BenchmarkDecoderDefaultCompression10K        867950       613890  -29.27%
BenchmarkDecoderDefaultCompression100K      5658240      2466726  -56.40%
BenchmarkDecoderBestCompression1K            383760       421634   +9.87%
BenchmarkDecoderBestCompression10K           867743       614671  -29.16%
BenchmarkDecoderBestCompression100K         5660160      2464996  -56.45%

image/png benchmarks:
benchmark                       old ns/op    new ns/op    delta
BenchmarkDecodeGray               2540834      2389624   -5.95%
BenchmarkDecodeNRGBAGradient     10052700      9534565   -5.15%
BenchmarkDecodeNRGBAOpaque        8704710      8163430   -6.22%
BenchmarkDecodePaletted           1458779      1325017   -9.17%
BenchmarkDecodeRGB                7183606      6794668   -5.41%

Wall time for Denis Cheremisov's PNG-decoding program given in
https://groups.google.com/group/golang-nuts/browse_thread/thread/22aa8a05040fdd49
Before: 3.07s
After:  2.32s
Delta:  -24%

Before profile:
Total: 304 samples
         159  52.3%  52.3%      251  82.6% compress/flate.(*decompressor).huffmanBlock
          58  19.1%  71.4%       76  25.0% compress/flate.(*decompressor).huffSym
          32  10.5%  81.9%       32  10.5% hash/adler32.update
          16   5.3%  87.2%       22   7.2% bufio.(*Reader).ReadByte
          16   5.3%  92.4%       37  12.2% compress/flate.(*decompressor).moreBits
           7   2.3%  94.7%        7   2.3% hash/crc32.update
           7   2.3%  97.0%        7   2.3% runtime.memmove
           5   1.6%  98.7%        5   1.6% scanblock
           2   0.7%  99.3%        9   3.0% runtime.copy
           1   0.3%  99.7%        1   0.3% compress/flate.(*huffmanDecoder).init

After profile:
Total: 230 samples
          59  25.7%  25.7%       70  30.4% compress/flate.(*decompressor).huffSym
          45  19.6%  45.2%       45  19.6% hash/adler32.update
          35  15.2%  60.4%       35  15.2% compress/flate.forwardCopy
          20   8.7%  69.1%      151  65.7% compress/flate.(*decompressor).huffmanBlock
          16   7.0%  76.1%       24  10.4% compress/flate.(*decompressor).moreBits
          15   6.5%  82.6%       15   6.5% runtime.memmove
          11   4.8%  87.4%       50  21.7% compress/flate.(*decompressor).copyHist
           7   3.0%  90.4%        7   3.0% hash/crc32.update
           6   2.6%  93.0%        9   3.9% bufio.(*Reader).ReadByte
           4   1.7%  94.8%        4   1.7% runtime.slicearray

R=rsc, rogpeppe, dave
CC=golang-dev, krasin
https://golang.org/cl/6127064
This commit is contained in:
Nigel Tao 2012-05-01 10:51:34 +10:00
parent 84ce0f7687
commit 4de15a5cda
3 changed files with 88 additions and 31 deletions

View File

@ -0,0 +1,17 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package flate
// forwardCopy is like the built-in copy function except that it always goes
// forward from the start, even if the dst and src overlap.
func forwardCopy(dst, src []byte) int {
if len(src) > len(dst) {
src = src[:len(dst)]
}
for i, x := range src {
dst[i] = x
}
return len(src)
}

View File

@ -0,0 +1,42 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package flate
import (
"testing"
)
func TestForwardCopy(t *testing.T) {
testCases := []struct {
dst0, dst1 int
src0, src1 int
want string
}{
{0, 9, 0, 9, "012345678"},
{0, 5, 4, 9, "45678"},
{4, 9, 0, 5, "01230"},
{1, 6, 3, 8, "34567"},
{3, 8, 1, 6, "12121"},
{0, 9, 3, 6, "345"},
{3, 6, 0, 9, "012"},
{1, 6, 0, 9, "00000"},
{0, 4, 7, 8, "7"},
{0, 1, 6, 8, "6"},
{4, 4, 6, 9, ""},
{2, 8, 6, 6, ""},
{0, 0, 0, 0, ""},
}
for _, tc := range testCases {
b := []byte("012345678")
dst := b[tc.dst0:tc.dst1]
src := b[tc.src0:tc.src1]
n := forwardCopy(dst, src)
got := string(dst[:n])
if got != tc.want {
t.Errorf("dst=b[%d:%d], src=b[%d:%d]: got %q, want %q",
tc.dst0, tc.dst1, tc.src0, tc.src1, got, tc.want)
}
}
}

View File

@ -505,51 +505,49 @@ func (f *decompressor) huffmanBlock() {
return
}
p := f.hp - dist
if p < 0 {
p += len(f.hist)
}
for i := 0; i < length; i++ {
f.hist[f.hp] = f.hist[p]
f.hp++
p++
if f.hp == len(f.hist) {
// After flush continue copying out of history.
f.copyLen = length - (i + 1)
f.copyDist = dist
f.flush((*decompressor).copyHuff)
return
}
if p == len(f.hist) {
p = 0
}
f.copyLen, f.copyDist = length, dist
if f.copyHist() {
return
}
}
panic("unreached")
}
func (f *decompressor) copyHuff() {
length := f.copyLen
dist := f.copyDist
p := f.hp - dist
// copyHist copies f.copyLen bytes from f.hist (f.copyDist bytes ago) to itself.
// It reports whether the f.hist buffer is full.
func (f *decompressor) copyHist() bool {
p := f.hp - f.copyDist
if p < 0 {
p += len(f.hist)
}
for i := 0; i < length; i++ {
f.hist[f.hp] = f.hist[p]
f.hp++
p++
for f.copyLen > 0 {
n := f.copyLen
if x := len(f.hist) - f.hp; n > x {
n = x
}
if x := len(f.hist) - p; n > x {
n = x
}
forwardCopy(f.hist[f.hp:f.hp+n], f.hist[p:p+n])
p += n
f.hp += n
f.copyLen -= n
if f.hp == len(f.hist) {
f.copyLen = length - (i + 1)
// After flush continue copying out of history.
f.flush((*decompressor).copyHuff)
return
return true
}
if p == len(f.hist) {
p = 0
}
}
return false
}
// Continue processing Huffman block.
func (f *decompressor) copyHuff() {
if f.copyHist() {
return
}
f.huffmanBlock()
}
@ -584,9 +582,9 @@ func (f *decompressor) dataBlock() {
f.copyData()
}
// copyData copies f.copyLen bytes from the underlying reader into f.hist.
// It pauses for reads when f.hist is full.
func (f *decompressor) copyData() {
// Read f.dataLen bytes into history,
// pausing for reads as history fills.
n := f.copyLen
for n > 0 {
m := len(f.hist) - f.hp