Skip to content

Commit f2081de

Browse files
authored
Merge pull request #457 from erizocosmico/feature/lang-cache
internal/function: use cache for expensive language operations
2 parents 62b614c + 2cabb19 commit f2081de

File tree

18 files changed

+2382
-3
lines changed

18 files changed

+2382
-3
lines changed

Gopkg.lock

Lines changed: 12 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Gopkg.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,10 @@
66
name = "github.com/jessevdk/go-flags"
77
version = "1.3.0"
88

9+
[[constraint]]
10+
name = "github.com/hashicorp/golang-lru"
11+
version = "0.5.0"
12+
913
[[constraint]]
1014
name = "github.com/stretchr/testify"
1115
version = "1.1.4"

docs/using-gitbase/configuration.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
| `GITBASE_INDEX_DIR` | directory to save indexes, default `/var/lib/gitbase/index` |
1313
| `GITBASE_TRACE` | enable jaeger tracing, default disabled |
1414
| `GITBASE_READONLY` | allow read queries only, disabling creating and deleting indexes, default disabled |
15+
| `GITBASE_LANGUAGE_CACHE_SIZE` | size of the cache for the `language` UDF. The size is the maximum number of elements kept in the cache, 10000 by default |
1516

1617
### Jaeger tracing variables
1718

internal/function/language.go

Lines changed: 75 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,42 @@
11
package function
22

33
import (
4+
"encoding/binary"
45
"fmt"
6+
"hash/crc32"
7+
"os"
8+
"strconv"
59

10+
lru "github.com/hashicorp/golang-lru"
611
enry "gopkg.in/src-d/enry.v1"
712
"gopkg.in/src-d/go-mysql-server.v0/sql"
813
)
914

15+
const (
16+
languageCacheSizeKey = "GITBASE_LANGUAGE_CACHE_SIZE"
17+
defaultLanguageCacheSize = 10000
18+
)
19+
20+
func languageCacheSize() int {
21+
v := os.Getenv(languageCacheSizeKey)
22+
size, err := strconv.Atoi(v)
23+
if err != nil || size <= 0 {
24+
size = defaultLanguageCacheSize
25+
}
26+
27+
return size
28+
}
29+
30+
var languageCache *lru.TwoQueueCache
31+
32+
func init() {
33+
var err error
34+
languageCache, err = lru.New2Q(languageCacheSize())
35+
if err != nil {
36+
panic(fmt.Errorf("cannot initialize language cache: %s", err))
37+
}
38+
}
39+
1040
// Language gets the language of a file given its path and
1141
// the optional content of the file.
1242
type Language struct {
@@ -110,11 +140,53 @@ func (f *Language) Eval(ctx *sql.Context, row sql.Row) (interface{}, error) {
110140
blob = right.([]byte)
111141
}
112142

113-
if lang := enry.GetLanguage(path, blob); lang != "" {
114-
return lang, nil
143+
var hash [8]byte
144+
if len(blob) > 0 {
145+
hash = languageHash(path, blob)
146+
value, ok := languageCache.Get(hash)
147+
if ok {
148+
return value, nil
149+
}
150+
}
151+
152+
lang := enry.GetLanguage(path, blob)
153+
if lang == "" {
154+
return nil, nil
155+
}
156+
157+
if len(blob) > 0 {
158+
languageCache.Add(hash, lang)
115159
}
116160

117-
return nil, nil
161+
return lang, nil
162+
}
163+
164+
func languageHash(filename string, blob []byte) [8]byte {
165+
fh := filenameHash(filename)
166+
bh := blobHash(blob)
167+
168+
var result [8]byte
169+
copy(result[:], fh)
170+
copy(result[4:], bh)
171+
return result
172+
}
173+
174+
func blobHash(blob []byte) []byte {
175+
if len(blob) == 0 {
176+
return nil
177+
}
178+
179+
n := crc32.ChecksumIEEE(blob)
180+
hash := make([]byte, 4)
181+
binary.LittleEndian.PutUint32(hash, n)
182+
return hash
183+
}
184+
185+
func filenameHash(filename string) []byte {
186+
n := crc32.ChecksumIEEE([]byte(filename))
187+
hash := make([]byte, 4)
188+
binary.LittleEndian.PutUint32(hash, n)
189+
return hash
118190
}
119191

120192
// Children implements the Expression interface.

vendor/github.com/hashicorp/golang-lru/.gitignore

Lines changed: 23 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

vendor/github.com/hashicorp/golang-lru/2q.go

Lines changed: 223 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)