Skip to content

Commit ffaa70a

Browse files
committed
Cond. compilation of unsafe usage + safe fallback
The unsafe package is not available on some platforms (appengine, that I know of). On platforms that support it, unsafe is used; otherwise, a safe version is now used. Also added a slightly faster imlpementation using unsafe based on assumptions about the string/slice header format.
1 parent 4b32de2 commit ffaa70a

File tree

6 files changed

+241
-27
lines changed

6 files changed

+241
-27
lines changed

fastbytes.go

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
package jsonparser
2+
3+
func BytesParseInt(bytes []byte) (v int64, ok bool) {
4+
if len(bytes) == 0 {
5+
return 0, false
6+
}
7+
8+
var neg bool = false
9+
if bytes[0] == '-' {
10+
neg = true
11+
bytes = bytes[1:]
12+
}
13+
14+
for _, c := range bytes {
15+
if c >= '0' && c <= '9' {
16+
v = (10 * v) + int64(c-'0')
17+
} else {
18+
return 0, false
19+
}
20+
}
21+
22+
if neg {
23+
return -v, true
24+
} else {
25+
return v, true
26+
}
27+
}

fastbytes_test.go

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
package jsonparser
2+
3+
import (
4+
"strconv"
5+
"testing"
6+
"unsafe"
7+
)
8+
9+
type ParseIntTest struct {
10+
in string
11+
out int64
12+
isErr bool
13+
}
14+
15+
var parseIntTests = []ParseIntTest{
16+
{
17+
in: "0",
18+
out: 0,
19+
},
20+
{
21+
in: "1",
22+
out: 1,
23+
},
24+
{
25+
in: "-1",
26+
out: -1,
27+
},
28+
{
29+
in: "12345",
30+
out: 12345,
31+
},
32+
{
33+
in: "-12345",
34+
out: -12345,
35+
},
36+
{
37+
in: "9223372036854775807",
38+
out: 9223372036854775807,
39+
},
40+
{
41+
in: "-9223372036854775808",
42+
out: -9223372036854775808,
43+
},
44+
{
45+
in: "18446744073709551616", // = 2^64; integer overflow is not detected
46+
out: 0,
47+
},
48+
49+
{
50+
in: "",
51+
isErr: true,
52+
},
53+
{
54+
in: "abc",
55+
isErr: true,
56+
},
57+
{
58+
in: "12345x",
59+
isErr: true,
60+
},
61+
{
62+
in: "123e5",
63+
isErr: true,
64+
},
65+
{
66+
in: "9223372036854775807x",
67+
isErr: true,
68+
},
69+
}
70+
71+
func TestBytesParseInt(t *testing.T) {
72+
for _, test := range parseIntTests {
73+
out, ok := BytesParseInt([]byte(test.in))
74+
if ok != !test.isErr {
75+
t.Errorf("Test '%s' error return did not match expectation (obtained %t, expected %t)", test.in, !ok, test.isErr)
76+
} else if ok && out != test.out {
77+
t.Errorf("Test '%s' did not return the expected value (obtained %d, expected %d)", test.in, out, test.out)
78+
}
79+
}
80+
}
81+
82+
func BenchmarkParseInt(b *testing.B) {
83+
bytes := []byte("123")
84+
for i := 0; i < b.N; i++ {
85+
BytesParseInt(bytes)
86+
}
87+
}
88+
89+
// Alternative implementation using unsafe and delegating to strconv.ParseInt
90+
func BenchmarkParseIntUnsafeSlower(b *testing.B) {
91+
bytes := []byte("123")
92+
for i := 0; i < b.N; i++ {
93+
strconv.ParseInt(*(*string)(unsafe.Pointer(&bytes)), 10, 64)
94+
}
95+
}

fastbytessafe.go

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
// +build appengine appenginevm
2+
3+
package jsonparser
4+
5+
import (
6+
"strconv"
7+
)
8+
9+
// See fastbytes_unsafe.go for explanation on why *[]byte is used (signatures must be consistent with those in that file)
10+
11+
func BytesEqualStr(abytes *[]byte, bstr string) bool {
12+
return string(*abytes) == bstr
13+
}
14+
15+
func BytesParseFloat(bytes *[]byte, prec int) (float64, error) {
16+
return strconv.ParseFloat(string(*bytes), prec)
17+
}

fastbytesunsafe.go

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
// +build !appengine,!appenginevm
2+
3+
package jsonparser
4+
5+
import (
6+
"strconv"
7+
"unsafe"
8+
)
9+
10+
//
11+
// The reason for using *[]byte rather than []byte in parameters is an optimization. As of Go 1.6,
12+
// the compiler cannot perfectly inline the function when using a non-pointer slice. That is,
13+
// the non-pointer []byte parameter version is slower than if its function body is manually
14+
// inlined, whereas the pointer []byte version is equally fast to the manually inlined
15+
// version. Instruction count in assembly taken from "go tool compile" confirms this difference.
16+
//
17+
18+
func BytesEqualStr(abytesptr *[]byte, bstr string) bool {
19+
return *(*string)(unsafe.Pointer(abytesptr)) == bstr
20+
}
21+
22+
func BytesParseFloat(bytesptr *[]byte, bitSize int) (float64, error) {
23+
return strconv.ParseFloat(*(*string)(unsafe.Pointer(bytesptr)), bitSize)
24+
}

fastbytesunsafe_test.go

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
// +build !appengine,!appenginevm
2+
3+
package jsonparser
4+
5+
import (
6+
"reflect"
7+
"strings"
8+
"testing"
9+
"unsafe"
10+
)
11+
12+
var (
13+
// short string/[]byte sequences, as the difference between these
14+
// three methods is a constant overhead
15+
benchmarkString = "0123456789x"
16+
benchmarkBytes = []byte("0123456789y")
17+
)
18+
19+
func bytesEqualStrSafe(abytes []byte, bstr string) bool {
20+
return bstr == string(abytes)
21+
}
22+
23+
func bytesEqualStrUnsafeSlower(abytes *[]byte, bstr string) bool {
24+
aslicehdr := (*reflect.SliceHeader)(unsafe.Pointer(abytes))
25+
astrhdr := reflect.StringHeader{Data: aslicehdr.Data, Len: aslicehdr.Len}
26+
return *(*string)(unsafe.Pointer(&astrhdr)) == bstr
27+
}
28+
29+
func TestEqual(t *testing.T) {
30+
if !BytesEqualStr(&[]byte{}, "") {
31+
t.Errorf(`BytesEqualStr("", ""): expected true, obtained false`)
32+
return
33+
}
34+
35+
longstr := strings.Repeat("a", 1000)
36+
for i := 0; i < len(longstr); i++ {
37+
s1, s2 := longstr[:i]+"1", longstr[:i]+"2"
38+
b1 := []byte(s1)
39+
40+
if !BytesEqualStr(&b1, s1) {
41+
t.Errorf(`BytesEqualStr("a"*%d + "1", "a"*%d + "1"): expected true, obtained false`, i, i)
42+
break
43+
}
44+
if BytesEqualStr(&b1, s2) {
45+
t.Errorf(`BytesEqualStr("a"*%d + "1", "a"*%d + "2"): expected false, obtained true`, i, i)
46+
break
47+
}
48+
}
49+
}
50+
51+
func BenchmarkBytesEqualStr(b *testing.B) {
52+
for i := 0; i < b.N; i++ {
53+
BytesEqualStr(&benchmarkBytes, benchmarkString)
54+
}
55+
}
56+
57+
// Alternative implementation without using unsafe
58+
func BenchmarkBytesEqualStrSafe(b *testing.B) {
59+
for i := 0; i < b.N; i++ {
60+
bytesEqualStrSafe(benchmarkBytes, benchmarkString)
61+
}
62+
}
63+
64+
// Alternative implementation using unsafe, but that is slower than the current implementation
65+
func BenchmarkBytesEqualStrUnsafeSlower(b *testing.B) {
66+
for i := 0; i < b.N; i++ {
67+
bytesEqualStrUnsafeSlower(&benchmarkBytes, benchmarkString)
68+
}
69+
}

parser.go

Lines changed: 9 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,7 @@ import (
44
"bytes"
55
"errors"
66
"fmt"
7-
"reflect"
87
"strconv"
9-
"unsafe"
108
)
119

1210
// Errors
@@ -127,10 +125,10 @@ func searchKeys(data []byte, keys ...string) int {
127125

128126
// if string is a Key, and key level match
129127
if data[i] == ':' {
130-
key := unsafeBytesToString(data[keyBegin:keyEnd])
128+
candidateKey := data[keyBegin:keyEnd]
131129

132130
if keyLevel == level-1 && // If key nesting level match current object nested level
133-
keys[level-1] == key {
131+
BytesEqualStr(&candidateKey, keys[level-1]) {
134132
keyLevel++
135133
// If we found all keys in path
136134
if keyLevel == lk {
@@ -343,17 +341,6 @@ func ArrayEach(data []byte, cb func(value []byte, dataType ValueType, offset int
343341
return nil
344342
}
345343

346-
// GetUnsafeString returns the value retrieved by `Get`, use creates string without memory allocation by mapping string to slice memory. It does not handle escape symbols.
347-
func GetUnsafeString(data []byte, keys ...string) (val string, err error) {
348-
v, _, _, e := Get(data, keys...)
349-
350-
if e != nil {
351-
return "", e
352-
}
353-
354-
return unsafeBytesToString(v), nil
355-
}
356-
357344
// GetString returns the value retrieved by `Get`, cast to a string if possible, trying to properly handle escape and utf8 symbols
358345
// If key data type do not match, it will return an error.
359346
func GetString(data []byte, keys ...string) (val string, err error) {
@@ -372,7 +359,7 @@ func GetString(data []byte, keys ...string) (val string, err error) {
372359
return string(v), nil
373360
}
374361

375-
s, err := strconv.Unquote(`"` + unsafeBytesToString(v) + `"`)
362+
s, err := strconv.Unquote(`"` + string(v) + `"`)
376363

377364
return s, err
378365
}
@@ -391,7 +378,7 @@ func GetFloat(data []byte, keys ...string) (val float64, err error) {
391378
return 0, fmt.Errorf("Value is not a number: %s", string(v))
392379
}
393380

394-
val, err = strconv.ParseFloat(unsafeBytesToString(v), 64)
381+
val, err = BytesParseFloat(&v, 64)
395382
return
396383
}
397384

@@ -408,8 +395,11 @@ func GetInt(data []byte, keys ...string) (val int64, err error) {
408395
return 0, fmt.Errorf("Value is not a number: %s", string(v))
409396
}
410397

411-
val, err = strconv.ParseInt(unsafeBytesToString(v), 10, 64)
412-
return
398+
if val, ok := BytesParseInt(v); !ok {
399+
return 0, MalformedValueError
400+
} else {
401+
return val, nil
402+
}
413403
}
414404

415405
// GetBoolean returns the value retrieved by `Get`, cast to a bool if possible.
@@ -434,11 +424,3 @@ func GetBoolean(data []byte, keys ...string) (val bool, err error) {
434424

435425
return
436426
}
437-
438-
// A hack until issue golang/go#2632 is fixed.
439-
// See: https://github.com/golang/go/issues/2632
440-
func unsafeBytesToString(data []byte) string {
441-
h := (*reflect.SliceHeader)(unsafe.Pointer(&data))
442-
sh := reflect.StringHeader{Data: h.Data, Len: h.Len}
443-
return *(*string)(unsafe.Pointer(&sh))
444-
}

0 commit comments

Comments
 (0)