Skip to content

Commit 3ac3d44

Browse files
committed
internal/function: use cache for expensive language operations
Signed-off-by: Miguel Molina <[email protected]>
1 parent 62b614c commit 3ac3d44

File tree

1 file changed

+71
-3
lines changed

1 file changed

+71
-3
lines changed

internal/function/language.go

Lines changed: 71 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,27 @@
11
package function
22

33
import (
4+
"encoding/binary"
45
"fmt"
6+
"hash/crc32"
57

8+
lru "github.com/hashicorp/golang-lru"
69
enry "gopkg.in/src-d/enry.v1"
710
"gopkg.in/src-d/go-mysql-server.v0/sql"
811
)
912

13+
const defaultLanguageCacheSize = 10000
14+
15+
var languageCache *lru.TwoQueueCache
16+
17+
func init() {
18+
var err error
19+
languageCache, err = lru.New2Q(defaultLanguageCacheSize)
20+
if err != nil {
21+
panic(fmt.Errorf("cannot initialize language cache: %s", err))
22+
}
23+
}
24+
1025
// Language gets the language of a file given its path and
1126
// the optional content of the file.
1227
type Language struct {
@@ -110,11 +125,64 @@ func (f *Language) Eval(ctx *sql.Context, row sql.Row) (interface{}, error) {
110125
blob = right.([]byte)
111126
}
112127

113-
if lang := enry.GetLanguage(path, blob); lang != "" {
114-
return lang, nil
128+
var hash [8]byte
129+
if len(blob) > 0 {
130+
hash = languageHash(path, blob)
131+
value, ok := languageCache.Get(hash)
132+
if ok {
133+
return value, nil
134+
}
135+
}
136+
137+
lang := enry.GetLanguage(path, blob)
138+
if lang == "" {
139+
return nil, nil
140+
}
141+
142+
if len(blob) > 0 {
143+
languageCache.Add(hash, lang)
115144
}
116145

117-
return nil, nil
146+
return lang, nil
147+
}
148+
149+
func languageHash(filename string, blob []byte) [8]byte {
150+
fh := filenameHash(filename)
151+
bh := blobHash(blob)
152+
153+
var result [8]byte
154+
copy(result[:], fh)
155+
copy(result[4:], bh)
156+
return result
157+
}
158+
159+
const blobPeekSize = 40
160+
161+
func blobHash(blob []byte) []byte {
162+
if len(blob) == 0 {
163+
return nil
164+
}
165+
166+
var result []byte
167+
if len(blob) < blobPeekSize*2 {
168+
result = blob
169+
} else {
170+
result = make([]byte, 0, blobPeekSize*2)
171+
result = append(result, blob[:blobPeekSize]...)
172+
result = append(result, blob[len(blob)-blobPeekSize:]...)
173+
}
174+
175+
n := crc32.ChecksumIEEE(result)
176+
hash := make([]byte, 4)
177+
binary.LittleEndian.PutUint32(hash, n)
178+
return hash
179+
}
180+
181+
func filenameHash(filename string) []byte {
182+
n := crc32.ChecksumIEEE([]byte(filename))
183+
hash := make([]byte, 4)
184+
binary.LittleEndian.PutUint32(hash, n)
185+
return hash
118186
}
119187

120188
// Children implements the Expression interface.

0 commit comments

Comments
 (0)