Skip to content

Commit 342c7ce

Browse files
committed
Optimize decode for ~27-75% better performance.
This optimizes the decode function to significantly improve its performance. It accomplishes this by making the following overall changes: - uses uint32 words to reduce the total number of multiplications that need to be done - changes to a more efficient total size approximation which reduces the overall internal buffer sizes - employs a stack allocated array for the internal array when working with typical sizes - skips all leading zeros Note that there is in an additional heap allocation for larger inputs as compared to the existing code, but the overall perf gains on such large inputs is even better than on the more typical smaller inputs due to fewer overall calculations which more than makes up for it as can be seen in the benchmarks. Finally, in an effort to help ensure correctness, the new code was fuzz tested for 24 hours on 16 cores for a total effective fuzz time of 384 hours with no issues found. name old time/op new time/op delta -------------------------------------------------------------------- Base58Decode/20_bytes_addrhash 240ns ± 2% 133ns ± 1% -44.34% Base58Decode/53_chars_wif 798ns ± 2% 299ns ± 1% -62.51% Base58Decode/111_chars_extkey 3.54µs ± 1% 1.01µs ± 2% -71.51% Base58Decode/50_zeros 128ns ± 1% 69ns ± 1% -46.14% Base58Decode/200_bytes_large 20.0µs ± 1% 5.1µs ± 0% -74.29% CheckDecode 677ns ± 1% 493ns ± 0% -27.24% name old allocs/op new allocs/op delta ---------------------------------------------------------------------- Base58Decode/20_bytes_addrhash 1.00 ± 0% 1.00 ± 0% ~ Base58Decode/53_chars_wif 1.00 ± 0% 1.00 ± 0% ~ Base58Decode/111_chars_extkey 1.00 ± 0% 1.00 ± 0% ~ Base58Decode/50_zeros 1.00 ± 0% 1.00 ± 0% ~ Base58Decode/200_bytes_large 1.00 ± 0% 2.00 ± 0% +100.00% CheckDecode 1.00 ± 0% 1.00 ± 0% ~
1 parent a94b676 commit 342c7ce

1 file changed

Lines changed: 79 additions & 28 deletions

File tree

base58.go

Lines changed: 79 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,14 @@
11
// Copyright (c) 2013-2015 The btcsuite developers
2-
// Copyright (c) 2015-2020 The Decred developers
2+
// Copyright (c) 2015-2025 The Decred developers
33
// Use of this source code is governed by an ISC
44
// license that can be found in the LICENSE file.
55

66
package base58
77

8+
import (
9+
"math/bits"
10+
)
11+
812
//go:generate go run genalphabet.go
913

1014
// Decode decodes a modified base58 string to a byte slice.
@@ -13,45 +17,92 @@ func Decode(input string) []byte {
1317
return []byte("")
1418
}
1519

16-
// The max possible output size is when a base58 encoding consists of
17-
// nothing but the alphabet character at index 0 which would result in the
18-
// same number of bytes as the number of input chars.
19-
output := make([]byte, len(input))
20+
// Determine the maximum possible output size.
21+
//
22+
// Since the conversion is from base58 to base256, the max possible number
23+
// of bytes of output per input byte, excluding the leading zeros, is
24+
// log_256(58). Therefore, the max total output size is the number of
25+
// leading zero bytes plus ceil(inputSizeMinusLeadingZeros * log_256(58)).
26+
//
27+
// Note that log_256(58) ~= 0.7322 < 47/64 which is within 0.3% of the true
28+
// value and efficient to compute as it only involves division by a power of
29+
// 2 and thus serves as a good approximation. So, the calculation below is
30+
// the integer division equivalent of nlz + ceil(len(input[nlz:]) * 47/64).
31+
//
32+
// Finally, in order to avoid additional conditional branches in the
33+
// conversion from uint32s to bytes, the max output size is rounded up to
34+
// the next multiple of 4.
35+
var nlz int
36+
for i := 0; i < len(input) && input[i] == alphabetIdx0; i++ {
37+
nlz++
38+
}
39+
maxOutputSizeNoLZ := (len(input[nlz:])*47 + 63) / 64
40+
maxOutputSize := nlz + maxOutputSizeNoLZ
41+
maxOutputSize = ((maxOutputSize + 3) / 4) * 4
42+
output := make([]byte, maxOutputSize)
43+
44+
// The algorithm below performs the calculations with uint32s for better
45+
// performance and the total number of uint32s is ceil(maxOutputSizeNoLZ /
46+
// 4). Note that the leading zeros are skipped here, so the calculation is
47+
// based on the max output size excluding them.
48+
//
49+
// In order to avoid an additional heap allocation for the vast majority of
50+
// typical cases, use an array on the stack for inputs of up to 120 chars
51+
// (plus any leading zeros) and fall back to a heap alloc for larger inputs.
52+
// Note that 120 input chars, excluding leading zeros, equates to a max
53+
// output size of 92 when applying the same calculations as above.
54+
//
55+
// This value was chosen because it provides a good balance between alloc
56+
// size, speed, and the max chars in the vast majority of inputs decoded in
57+
// the most common use cases.
58+
const maxOut32StackAlloc = 92 / 4
59+
maxOut32Size := (maxOutputSizeNoLZ + 3) / 4
60+
var out32 []uint32
61+
if maxOut32Size <= maxOut32StackAlloc {
62+
var out32Arr [maxOut32StackAlloc]uint32
63+
out32 = out32Arr[:maxOut32Size]
64+
} else {
65+
out32 = make([]uint32, maxOut32Size)
66+
}
2067

21-
// Encode to base256 in reverse order to avoid extra calculations to
22-
// determine the final output size in favor of just keeping track while
23-
// iterating.
24-
var index int
25-
for _, r := range []byte(input) {
68+
// Decode to base256 in reverse order to reduce the total number of overall
69+
// calculations.
70+
var out32Idx int
71+
for _, r := range []byte(input[nlz:]) {
2672
// Invalid base58 character.
27-
val := uint32(b58[r])
73+
val := uint64(b58[r])
2874
if val == 255 {
2975
return []byte("")
3076
}
3177

32-
// Multiply each byte in the output by 58 and encode to base256 while
33-
// propagating the carry.
34-
for i, b := range output[:index] {
35-
val += uint32(b) * 58
36-
output[i] = byte(val)
37-
val >>= 8
78+
for i, ui32 := range out32[:out32Idx] {
79+
val += uint64(ui32) * 58
80+
out32[i] = uint32(val) // nolint:gosec
81+
val >>= 32
3882
}
39-
for ; val > 0; val >>= 8 {
40-
output[index] = byte(val)
41-
index++
83+
if val > 0 {
84+
out32[out32Idx] = uint32(val) // nolint:gosec
85+
out32Idx++
4286
}
4387
}
4488

45-
// Account for the leading zeros in the input. They are appended since the
46-
// encoding is happening in reverse order.
47-
for _, r := range []byte(input) {
48-
if r != alphabetIdx0 {
49-
break
50-
}
89+
// Convert uint32 words to bytes.
90+
var index int
91+
for _, ui32 := range out32[:out32Idx] {
92+
output[index] = byte(ui32)
93+
output[index+1] = byte(ui32 >> 8)
94+
output[index+2] = byte(ui32 >> 16)
95+
output[index+3] = byte(ui32 >> 24)
96+
index += 4
97+
}
5198

52-
output[index] = 0
53-
index++
99+
// Adjust the output index to the position of the most significant byte and
100+
// to account for the leading zeros in the input. They come last since the
101+
// decoding is happening in reverse order.
102+
if out32Idx > 0 {
103+
index -= bits.LeadingZeros32(out32[out32Idx-1]) / 8
54104
}
105+
index += nlz
55106

56107
// Truncate the output buffer to the actual number of decoded bytes and
57108
// reverse it since it was calculated in reverse order.

0 commit comments

Comments
 (0)