|
1 | 1 | package function
|
2 | 2 |
|
3 | 3 | import (
|
| 4 | + "encoding/binary" |
4 | 5 | "fmt"
|
| 6 | + "hash/crc32" |
5 | 7 |
|
| 8 | + lru "github.com/hashicorp/golang-lru" |
6 | 9 | enry "gopkg.in/src-d/enry.v1"
|
7 | 10 | "gopkg.in/src-d/go-mysql-server.v0/sql"
|
8 | 11 | )
|
9 | 12 |
|
| 13 | +const defaultLanguageCacheSize = 10000 |
| 14 | + |
| 15 | +var languageCache *lru.TwoQueueCache |
| 16 | + |
| 17 | +func init() { |
| 18 | + var err error |
| 19 | + languageCache, err = lru.New2Q(defaultLanguageCacheSize) |
| 20 | + if err != nil { |
| 21 | + panic(fmt.Errorf("cannot initialize language cache: %s", err)) |
| 22 | + } |
| 23 | +} |
| 24 | + |
10 | 25 | // Language gets the language of a file given its path and
|
11 | 26 | // the optional content of the file.
|
12 | 27 | type Language struct {
|
@@ -110,11 +125,64 @@ func (f *Language) Eval(ctx *sql.Context, row sql.Row) (interface{}, error) {
|
110 | 125 | blob = right.([]byte)
|
111 | 126 | }
|
112 | 127 |
|
113 |
| - if lang := enry.GetLanguage(path, blob); lang != "" { |
114 |
| - return lang, nil |
| 128 | + var hash [8]byte |
| 129 | + if len(blob) > 0 { |
| 130 | + hash = languageHash(path, blob) |
| 131 | + value, ok := languageCache.Get(hash) |
| 132 | + if ok { |
| 133 | + return value, nil |
| 134 | + } |
| 135 | + } |
| 136 | + |
| 137 | + lang := enry.GetLanguage(path, blob) |
| 138 | + if lang == "" { |
| 139 | + return nil, nil |
| 140 | + } |
| 141 | + |
| 142 | + if len(blob) > 0 { |
| 143 | + languageCache.Add(hash, lang) |
115 | 144 | }
|
116 | 145 |
|
117 |
| - return nil, nil |
| 146 | + return lang, nil |
| 147 | +} |
| 148 | + |
| 149 | +func languageHash(filename string, blob []byte) [8]byte { |
| 150 | + fh := filenameHash(filename) |
| 151 | + bh := blobHash(blob) |
| 152 | + |
| 153 | + var result [8]byte |
| 154 | + copy(result[:], fh) |
| 155 | + copy(result[4:], bh) |
| 156 | + return result |
| 157 | +} |
| 158 | + |
| 159 | +const blobPeekSize = 40 |
| 160 | + |
| 161 | +func blobHash(blob []byte) []byte { |
| 162 | + if len(blob) == 0 { |
| 163 | + return nil |
| 164 | + } |
| 165 | + |
| 166 | + var result []byte |
| 167 | + if len(blob) < blobPeekSize*2 { |
| 168 | + result = blob |
| 169 | + } else { |
| 170 | + result = make([]byte, 0, blobPeekSize*2) |
| 171 | + result = append(result, blob[:blobPeekSize]...) |
| 172 | + result = append(result, blob[len(blob)-blobPeekSize:]...) |
| 173 | + } |
| 174 | + |
| 175 | + n := crc32.ChecksumIEEE(result) |
| 176 | + hash := make([]byte, 4) |
| 177 | + binary.LittleEndian.PutUint32(hash, n) |
| 178 | + return hash |
| 179 | +} |
| 180 | + |
| 181 | +func filenameHash(filename string) []byte { |
| 182 | + n := crc32.ChecksumIEEE([]byte(filename)) |
| 183 | + hash := make([]byte, 4) |
| 184 | + binary.LittleEndian.PutUint32(hash, n) |
| 185 | + return hash |
118 | 186 | }
|
119 | 187 |
|
120 | 188 | // Children implements the Expression interface.
|
|
0 commit comments