Skip to content

Commit 0395b9d

Browse files
authored
Merge pull request #175 from erizocosmico/feature/blob-content
gitquery: add content column to blobs table
2 parents 552cde9 + b1d4c54 commit 0395b9d

File tree

3 files changed

+150
-4
lines changed

3 files changed

+150
-4
lines changed

blobs.go

Lines changed: 79 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,38 @@
11
package gitquery
22

33
import (
4+
"bufio"
45
"io"
6+
"io/ioutil"
57

68
"gopkg.in/src-d/go-mysql-server.v0/sql"
79

810
"gopkg.in/src-d/go-git.v4/plumbing"
911
"gopkg.in/src-d/go-git.v4/plumbing/object"
1012
)
1113

14+
const (
15+
blobsMaxSizeKey = "GITQUERY_BLOBS_MAX_SIZE"
16+
blobsAllowBinaryKey = "GITQUERY_BLOBS_ALLOW_BINARY"
17+
18+
b = 1
19+
kib = 1024 * b
20+
mib = 1024 * kib
21+
)
22+
23+
var (
24+
blobsAllowBinary = getBoolEnv(blobsAllowBinaryKey, false)
25+
blobsMaxSize = getIntEnv(blobsMaxSizeKey, 5) * mib
26+
)
27+
1228
type blobsTable struct {
1329
pool *RepositoryPool
1430
}
1531

1632
var blobsSchema = sql.Schema{
1733
{Name: "hash", Type: sql.Text, Nullable: false, Source: blobsTableName},
1834
{Name: "size", Type: sql.Int64, Nullable: false, Source: blobsTableName},
35+
{Name: "content", Type: sql.Blob, Nullable: false, Source: blobsTableName},
1936
}
2037

2138
var _ sql.PushdownProjectionAndFiltersTable = (*blobsTable)(nil)
@@ -104,7 +121,7 @@ func (i *blobIter) Next() (sql.Row, error) {
104121
return nil, err
105122
}
106123

107-
return blobToRow(o), nil
124+
return blobToRow(o)
108125
}
109126

110127
func (i *blobIter) Close() error {
@@ -142,17 +159,75 @@ func (i *blobsByHashIter) Next() (sql.Row, error) {
142159
return nil, err
143160
}
144161

145-
return blobToRow(blob), nil
162+
return blobToRow(blob)
146163
}
147164
}
148165

149166
func (i *blobsByHashIter) Close() error {
150167
return nil
151168
}
152169

153-
func blobToRow(c *object.Blob) sql.Row {
170+
func blobToRow(c *object.Blob) (sql.Row, error) {
171+
var content []byte
172+
var isAllowed = blobsAllowBinary
173+
if !isAllowed {
174+
ok, err := isBinary(c)
175+
if err != nil {
176+
return nil, err
177+
}
178+
isAllowed = !ok
179+
}
180+
181+
if c.Size <= int64(blobsMaxSize) && isAllowed {
182+
r, err := c.Reader()
183+
if err != nil {
184+
return nil, err
185+
}
186+
187+
content, err = ioutil.ReadAll(r)
188+
if err != nil {
189+
return nil, err
190+
}
191+
}
192+
154193
return sql.NewRow(
155194
c.Hash.String(),
156195
c.Size,
157-
)
196+
content,
197+
), nil
198+
}
199+
200+
const sniffLen = 8000
201+
202+
// isBinary detects if data is a binary value based on:
203+
// http://git.kernel.org/cgit/git/git.git/tree/xdiff-interface.c?id=HEAD#n198
204+
func isBinary(blob *object.Blob) (bool, error) {
205+
r, err := blob.Reader()
206+
if err != nil {
207+
return false, err
208+
}
209+
210+
defer r.Close()
211+
212+
rd := bufio.NewReader(r)
213+
var i int
214+
for {
215+
if i >= sniffLen {
216+
return false, nil
217+
}
218+
i++
219+
220+
b, err := rd.ReadByte()
221+
if err == io.EOF {
222+
return false, nil
223+
}
224+
225+
if err != nil {
226+
return false, err
227+
}
228+
229+
if b == 0 {
230+
return true, nil
231+
}
232+
}
158233
}

blobs_test.go

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,50 @@ func TestBlobsTable_RowIter(t *testing.T) {
4949
}
5050
}
5151

52+
func TestBlobsLimit(t *testing.T) {
53+
require := require.New(t)
54+
session, _, cleanup := setup(t)
55+
defer cleanup()
56+
57+
prev := blobsMaxSize
58+
blobsMaxSize = 200000
59+
defer func() {
60+
blobsMaxSize = prev
61+
}()
62+
63+
table := newBlobsTable(session.Pool)
64+
iter, err := table.RowIter(session)
65+
require.NoError(err)
66+
67+
rows, err := sql.RowIterToRows(iter)
68+
require.NoError(err)
69+
70+
expected := []struct {
71+
hash string
72+
bytes int64
73+
empty bool
74+
}{
75+
{"32858aad3c383ed1ff0a0f9bdf231d54a00c9e88", 189, false},
76+
{"d3ff53e0564a9f87d8e84b6e28e5060e517008aa", 18, false},
77+
{"c192bd6a24ea1ab01d78686e417c8bdc7c3d197f", 1072, false},
78+
{"7e59600739c96546163833214c36459e324bad0a", 9, false},
79+
{"d5c0f4ab811897cadf03aec358ae60d21f91c50d", 76110, true}, // is binary
80+
{"880cd14280f4b9b6ed3986d6671f907d7cc2a198", 2780, false},
81+
{"49c6bb89b17060d7b4deacb7b338fcc6ea2352a9", 217848, true}, // exceeds threshold
82+
{"c8f1d8c61f9da76f4cb49fd86322b6e685dba956", 706, false},
83+
{"9a48f23120e880dfbe41f7c9b7b708e9ee62a492", 11488, false},
84+
{"9dea2395f5403188298c1dabe8bdafe562c491e3", 78, false},
85+
}
86+
87+
require.Len(rows, len(expected))
88+
for i, row := range rows {
89+
e := expected[i]
90+
require.Equal(e.hash, row[0].(string))
91+
require.Equal(e.bytes, row[1].(int64))
92+
require.Equal(e.empty, len(row[2].([]byte)) == 0)
93+
}
94+
}
95+
5296
func TestBlobsPushdown(t *testing.T) {
5397
require := require.New(t)
5498
session, _, cleanup := setup(t)

env.go

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
package gitquery
2+
3+
import (
4+
"os"
5+
"strconv"
6+
)
7+
8+
func getIntEnv(key string, defaultValue int) int {
9+
val := os.Getenv(key)
10+
if val == "" {
11+
return defaultValue
12+
}
13+
v, err := strconv.Atoi(val)
14+
if err != nil {
15+
return defaultValue
16+
}
17+
return v
18+
}
19+
20+
func getBoolEnv(key string, defaultValue bool) bool {
21+
_, ok := os.LookupEnv(key)
22+
if ok {
23+
return true
24+
}
25+
26+
return defaultValue
27+
}

0 commit comments

Comments
 (0)