Skip to content

Commit cc1b171

Browse files
authored
[Prototype] Blame func (#844)
[Prototype] Blame func
2 parents 97d9667 + 339e8a9 commit cc1b171

File tree

9 files changed

+380
-13
lines changed

9 files changed

+380
-13
lines changed

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
66

77
## [Unreleased]
88

9+
- Added BLAME function.
10+
911
## [0.24.0-rc2] - 2019-10-02
1012

1113
## Fixed

docs/using-gitbase/examples.md

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -200,6 +200,41 @@ The output will be similar to this:
200200
+-----------------+--------------------------------------+---------------+------------------+--------------------+
201201
```
202202

203+
## Get miscelaneous information about lines with a "// TODO" comment in HEAD
204+
205+
```sql
206+
SELECT repository_id,
207+
JSON_UNQUOTE(JSON_EXTRACT(bl, "$.commit")),
208+
JSON_UNQUOTE(JSON_EXTRACT(bl, "$.file")),
209+
JSON_UNQUOTE(JSON_EXTRACT(bl, "$.linenum")),
210+
JSON_UNQUOTE(JSON_EXTRACT(bl, "$.author")),
211+
JSON_UNQUOTE(JSON_EXTRACT(bl, "$.text"))
212+
FROM (SELECT repository_id,
213+
EXPLODE(BLAME(repository_id, commit_hash)) AS bl
214+
FROM ref_commits
215+
NATURAL JOIN blobs
216+
WHERE ref_name = 'HEAD'
217+
AND NOT IS_BINARY(blob_content)
218+
) as p
219+
WHERE JSON_EXTRACT(bl, "$.text") LIKE '%// TODO%';
220+
```
221+
222+
## Report of authors with more lines authored in HEAD
223+
224+
```sql
225+
SELECT
226+
JSON_UNQUOTE(JSON_EXTRACT(bl, "$.author")),
227+
COUNT(JSON_UNQUOTE(JSON_EXTRACT(bl, "$.author")))
228+
229+
FROM (SELECT EXPLODE(BLAME(repository_id, commit_hash)) AS bl
230+
FROM ref_commits
231+
NATURAL JOIN blobs
232+
WHERE ref_name = 'HEAD'
233+
AND NOT IS_BINARY(blob_content)
234+
) AS p
235+
GROUP BY JSON_UNQUOTE(JSON_EXTRACT(bl, "$.author"));
236+
```
237+
203238
# UAST UDFs Examples
204239

205240
First of all, you should check out the [bblfsh documentation](https://docs.sourced.tech/babelfish) to get yourself familiar with UAST concepts.

docs/using-gitbase/functions.md

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,19 +6,21 @@ To make some common tasks easier for the user, there are some functions to inter
66

77
| Name | Description |
88
|:-------------|:-------------------------------------------------------------------------------------------------------------------------------|
9-
|`commit_stats(repository_id, [from_commit_hash], to_commit_hash) json`|returns the stats between two commits for a repository. If `from_commit_hash` is empty, it will compare the given `to_commit_hash` with its parent commit. Vendored files stats are not included in the result of this function. This function is more thoroughly explained later in this document.|
9+
|`blame(repository, commit)`|Returns an array of lines changes and authorship. |
1010
|`commit_file_stats(repository_id, [from_commit_hash], to_commit_hash) json array`|returns an array with the stats of each file in `to_commit_hash` since the given `from_commit_hash`. If `from_commit_hash` is not given, the parent commit will be used. Vendored files stats are not included in the result of this function. This function is more thoroughly explained later in this document.|
11-
|`is_remote(reference_name)bool`| checks if the given reference name is from a remote one. |
12-
|`is_tag(reference_name)bool`| checks if the given reference name is a tag. |
13-
|`is_vendor(file_path)bool`| checks if the given file name is a vendored file. |
11+
|`commit_stats(repository_id, [from_commit_hash], to_commit_hash) json`|returns the stats between two commits for a repository. If `from_commit_hash` is empty, it will compare the given `to_commit_hash` with its parent commit. Vendored files stats are not included in the result of this function. This function is more thoroughly explained later in this document.|
12+
|`is_remote(reference_name)bool`| checks if the given reference name is from a remote one. |
13+
|`is_tag(reference_name)bool`| checks if the given reference name is a tag. |
14+
|`is_vendor(file_path)bool`| checks if the given file name is a vendored file. |
1415
|`language(path, [blob])text`| gets the language of a file given its path and the optional content of the file. |
16+
|`loc(path, blob) json`| returns a JSON map, containing the lines of code of a file, separated in three categories: Code, Blank and Comment lines. |
1517
|`uast(blob, [lang, [xpath]]) blob`| returns a node array of UAST nodes in semantic mode. |
18+
|`uast_children(blob) blob`| returns a flattened array of the children UAST nodes from each one of the UAST nodes in the given array. |
19+
|`uast_extract(blob, key) text array`| extracts information identified by the given key from the uast nodes. |
1620
|`uast_mode(mode, blob, lang) blob`| returns a node array of UAST nodes specifying its language and mode (semantic, annotated or native). |
1721
|`uast_xpath(blob, xpath) blob`| performs an XPath query over the given UAST nodes. |
18-
|`uast_extract(blob, key) text array`| extracts information identified by the given key from the uast nodes. |
19-
|`uast_children(blob) blob`| returns a flattened array of the children UAST nodes from each one of the UAST nodes in the given array. |
20-
|`loc(path, blob) json`| returns a JSON map, containing the lines of code of a file, separated in three categories: Code, Blank and Comment lines. |
2122
|`version() text`| returns the gitbase version in the following format `8.0.11-{GITBASE_VERSION}` for compatibility with MySQL versioning. |
23+
2224
## Standard functions
2325

2426
These are all functions that are available because they are implemented in `go-mysql-server`, used by gitbase.
@@ -36,10 +38,10 @@ These are all functions that are available because they are implemented in `go-m
3638
|`CONCAT_WS(sep, ...)`| concatenates any group of fields into a single string. The first argument is the separator for the rest of the arguments. The separator is added between the strings to be concatenated. The separator can be a string, as can the rest of the arguments. If the separator is NULL, the result is NULL.|
3739
|`CONNECTION_ID()`| returns the current connection ID.|
3840
|`COUNT(expr)`| returns a count of the number of non-NULL values of expr in the rows retrieved by a SELECT statement.|
41+
|`DATE(date)`| returns the date part of the given `date`.|
3942
|`DATE_ADD(date, interval)`| adds the interval to the given `date`.|
4043
|`DATE_SUB(date, interval)`| subtracts the interval from the given `date`.|
4144
|`DAY(date)`| is a synonym for DAYOFMONTH().|
42-
|`DATE(date)`| returns the date part of the given `date`.|
4345
|`DAYOFMONTH(date)`| returns the day of the month (0-31).|
4446
|`DAYOFWEEK(date)`| returns the day of the week of the given `date`.|
4547
|`DAYOFYEAR(date)`| returns the day of the year of the given `date`.|

go.mod

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ require (
3131
gopkg.in/src-d/go-billy-siva.v4 v4.6.0
3232
gopkg.in/src-d/go-billy.v4 v4.3.2
3333
gopkg.in/src-d/go-errors.v1 v1.0.0
34+
gopkg.in/src-d/go-git-fixtures.v3 v3.5.0
3435
gopkg.in/src-d/go-git.v4 v4.12.0
3536
gopkg.in/yaml.v2 v2.2.2
3637
vitess.io/vitess v3.0.0-rc.3.0.20190602171040-12bfde34629c+incompatible

go.sum

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -244,7 +244,8 @@ github.com/src-d/enry/v2 v2.0.0/go.mod h1:qQeCMRwzMF3ckeGr+h0tJLdxXnq+NVZsIDMELj
244244
github.com/src-d/envconfig v1.0.0/go.mod h1:Q9YQZ7BKITldTBnoxsE5gOeB5y66RyPXeue/R4aaNBc=
245245
github.com/src-d/gcfg v1.4.0 h1:xXbNR5AlLSA315x2UO+fTSSAXCDf+Ar38/6oyGbDKQ4=
246246
github.com/src-d/gcfg v1.4.0/go.mod h1:p/UMsR43ujA89BJY9duynAwIpvqEujIH/jFlfL7jWoI=
247-
github.com/src-d/go-borges v0.1.3 h1:EgNqVy6Mw6wT3mx5AIUWjOAJJJQ6yuiDimi8JV/R2hE=
247+
github.com/src-d/go-borges v0.1.1 h1:URkX6ycDzodt5n6mAF54cdjmmAMSWIyuL12ga/E5/sM=
248+
github.com/src-d/go-borges v0.1.1/go.mod h1:q/ufK0FXUS0ngH1Crd1TyirdtUM4dl/5ky2Oxiaw4YA=
248249
github.com/src-d/go-borges v0.1.3/go.mod h1:2lbHENELjpD+6bTOOnrnGEpVPMmO+wBGtYwzvQdr/Zc=
249250
github.com/src-d/go-git v4.7.0+incompatible h1:IYSSnbAHeKmsfbQFi9ozbid+KNh0bKjlorMfQehQbcE=
250251
github.com/src-d/go-git v4.7.0+incompatible/go.mod h1:1bQciz+hn0jzPQNsYj0hDFZHLJBdV7gXE2mWhC7EkFk=
@@ -330,7 +331,9 @@ golang.org/x/sys v0.0.0-20190221075227-b4e8571b14e0/go.mod h1:STP8DvDyc/dI5b8T5h
330331
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
331332
golang.org/x/sys v0.0.0-20190422165155-953cdadca894/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
332333
golang.org/x/sys v0.0.0-20190520201301-c432e742b0af/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
333-
golang.org/x/sys v0.0.0-20190726091711-fc99dfbffb4e h1:D5TXcfTk7xF7hvieo4QErS3qqCB4teTffacDWr7CI+0=
334+
golang.org/x/sys v0.0.0-20190609082536-301114b31cce/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
335+
golang.org/x/sys v0.0.0-20190618155005-516e3c20635f h1:dHNZYIYdq2QuU6w73vZ/DzesPbVlZVYZTtTZmrnsbQ8=
336+
golang.org/x/sys v0.0.0-20190618155005-516e3c20635f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
334337
golang.org/x/sys v0.0.0-20190726091711-fc99dfbffb4e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
335338
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
336339
golang.org/x/text v0.3.2 h1:tW2bmiBqwgJj/UpqtC8EpXEZVYOwU0yG4iWbprSVAcs=
@@ -363,12 +366,12 @@ gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8
363366
gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI=
364367
gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys=
365368
gopkg.in/gemnasium/logrus-airbrake-hook.v2 v2.1.2/go.mod h1:Xk6kEKp8OKb+X14hQBKWaSkCsqBpgog8nAV2xsGOxlo=
366-
gopkg.in/src-d/go-billy-siva.v4 v4.6.0 h1:HO5m7lqYewIZ3Otay3IkQg3gFznW8Gy9HIbHWm1mYX0=
369+
gopkg.in/src-d/go-billy-siva.v4 v4.5.1 h1:+UdpGGmJjANhXwg6TCcTVbACUqsbtX19QvJ9AdeX4ts=
370+
gopkg.in/src-d/go-billy-siva.v4 v4.5.1/go.mod h1:4wKeCzOCSsdyFeM5+58M6ObU6FM+lZT12p7zm7A+9n0=
367371
gopkg.in/src-d/go-billy-siva.v4 v4.6.0/go.mod h1:EcgzPxovlWGD+lZFFriUleL3EVZ/SPs6CH2FOE/eooI=
368372
gopkg.in/src-d/go-billy.v4 v4.2.1/go.mod h1:tm33zBoOwxjYHZIE+OV8bxTWFMJLrconzFMd38aARFk=
369373
gopkg.in/src-d/go-billy.v4 v4.3.0 h1:KtlZ4c1OWbIs4jCv5ZXrTqG8EQocr0g/d4DjNg70aek=
370374
gopkg.in/src-d/go-billy.v4 v4.3.0/go.mod h1:tm33zBoOwxjYHZIE+OV8bxTWFMJLrconzFMd38aARFk=
371-
gopkg.in/src-d/go-billy.v4 v4.3.2 h1:0SQA1pRztfTFx2miS8sA97XvooFeNOmvUenF4o0EcVg=
372375
gopkg.in/src-d/go-billy.v4 v4.3.2/go.mod h1:nDjArDMp+XMs1aFAESLRjfGSgfvoYN0hDfzEk0GjC98=
373376
gopkg.in/src-d/go-errors.v1 v1.0.0 h1:cooGdZnCjYbeS1zb1s6pVAAimTdKceRrpn7aKOnNIfc=
374377
gopkg.in/src-d/go-errors.v1 v1.0.0/go.mod h1:q1cBlomlw2FnDBDNGlnh6X0jPihy+QxZfMMNxPCbdYg=
@@ -379,7 +382,8 @@ gopkg.in/src-d/go-git.v4 v4.11.0/go.mod h1:Vtut8izDyrM8BUVQnzJ+YvmNcem2J89EmfZYC
379382
gopkg.in/src-d/go-git.v4 v4.12.0 h1:CKgvBCJCcdfNnyXPYI4Cp8PaDDAmAPEN0CtfEdEAbd8=
380383
gopkg.in/src-d/go-git.v4 v4.12.0/go.mod h1:zjlNnzc1Wjn43v3Mtii7RVxiReNP0fIu9npcXKzuNp4=
381384
gopkg.in/src-d/go-log.v1 v1.0.2/go.mod h1:GN34hKP0g305ysm2/hctJ0Y8nWP3zxXXJ8GFabTyABE=
382-
gopkg.in/src-d/go-siva.v1 v1.7.0 h1:igjgSEFweZ2kEfRlGEJH767o8GJRiPWp8JmHDCe0Vdk=
385+
gopkg.in/src-d/go-siva.v1 v1.5.0 h1:WowvbZTlz0SPoV7WNCGktPSi2yRK78HPyXl7wYqDeHE=
386+
gopkg.in/src-d/go-siva.v1 v1.5.0/go.mod h1:tk1jnIXawd/PTlRNWdr5V5lC0PttNJmu1fv7wt7IZlw=
383387
gopkg.in/src-d/go-siva.v1 v1.7.0/go.mod h1:ChxMHSRkICHZ9IbTlG3ihkuG7gc2RZPsIYh7OaXYvic=
384388
gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw=
385389
gopkg.in/toqueteos/substring.v1 v1.0.2 h1:urLqCeMm6x/eTuQa1oZerNw8N1KNOIp5hD5kGL7lFsE=

integration_test.go

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -509,6 +509,18 @@ func TestIntegration(t *testing.T) {
509509
{"vendor/foo.go"},
510510
},
511511
},
512+
{
513+
`
514+
SELECT repository_id, JSON_EXTRACT(bl, "$.author"),
515+
COUNT(bl)
516+
FROM (
517+
SELECT repository_id, EXPLODE(BLAME(repository_id, commit_hash)) as bl
518+
FROM commits
519+
WHERE commit_hash = '918c48b83bd081e863dbe1b80f8998f058cd8294'
520+
) as p
521+
`,
522+
[]sql.Row{{"worktree", "[email protected]", int64(7235)}},
523+
},
512524
}
513525

514526
var pid uint64

internal/function/blame.go

Lines changed: 187 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,187 @@
1+
package function
2+
3+
import (
4+
"fmt"
5+
6+
"github.com/src-d/gitbase"
7+
"github.com/src-d/go-mysql-server/sql"
8+
"gopkg.in/src-d/go-git.v4"
9+
10+
"gopkg.in/src-d/go-git.v4/plumbing"
11+
"gopkg.in/src-d/go-git.v4/plumbing/object"
12+
)
13+
14+
type BlameGenerator struct {
15+
commit *object.Commit
16+
fIter *object.FileIter
17+
curLine int
18+
curFile *object.File
19+
lines []*git.Line
20+
}
21+
22+
func NewBlameGenerator(c *object.Commit, f *object.FileIter) (*BlameGenerator, error) {
23+
return &BlameGenerator{commit: c, fIter: f, curLine: -1}, nil
24+
}
25+
26+
func (g *BlameGenerator) loadNewFile() error {
27+
var err error
28+
g.curFile, err = g.fIter.Next()
29+
if err != nil {
30+
return err
31+
}
32+
33+
result, err := git.Blame(g.commit, g.curFile.Name)
34+
if err != nil {
35+
return err
36+
}
37+
38+
if len(result.Lines) == 0 {
39+
return g.loadNewFile()
40+
}
41+
42+
g.lines = result.Lines
43+
g.curLine = 0
44+
return nil
45+
}
46+
47+
func (g *BlameGenerator) Next() (interface{}, error) {
48+
if g.curLine == -1 || g.curLine >= len(g.lines) {
49+
err := g.loadNewFile()
50+
if err != nil {
51+
return nil, err
52+
}
53+
}
54+
55+
l := g.lines[g.curLine]
56+
b := BlameLine{
57+
Commit: g.commit.Hash.String(),
58+
File: g.curFile.Name,
59+
LineNum: g.curLine,
60+
Author: l.Author,
61+
Text: l.Text,
62+
}
63+
g.curLine++
64+
return b, nil
65+
}
66+
67+
func (g *BlameGenerator) Close() error {
68+
g.fIter.Close()
69+
return nil
70+
}
71+
72+
var _ sql.Generator = (*BlameGenerator)(nil)
73+
74+
type (
75+
// Blame implements git-blame function as UDF
76+
Blame struct {
77+
repo sql.Expression
78+
commit sql.Expression
79+
}
80+
81+
// BlameLine represents each line of git blame's output
82+
BlameLine struct {
83+
Commit string `json:"commit"`
84+
File string `json:"file"`
85+
LineNum int `json:"linenum"`
86+
Author string `json:"author"`
87+
Text string `json:"text"`
88+
}
89+
)
90+
91+
// NewBlame constructor
92+
func NewBlame(repo, commit sql.Expression) sql.Expression {
93+
return &Blame{repo, commit}
94+
}
95+
96+
func (b *Blame) String() string {
97+
return fmt.Sprintf("blame(%s, %s)", b.repo, b.commit)
98+
}
99+
100+
// Type implements the sql.Expression interface
101+
func (*Blame) Type() sql.Type {
102+
return sql.Array(sql.JSON)
103+
}
104+
105+
func (b *Blame) WithChildren(children ...sql.Expression) (sql.Expression, error) {
106+
if len(children) != 2 {
107+
return nil, sql.ErrInvalidChildrenNumber.New(b, len(children), 2)
108+
}
109+
110+
return NewBlame(children[0], children[1]), nil
111+
}
112+
113+
// Children implements the Expression interface.
114+
func (b *Blame) Children() []sql.Expression {
115+
return []sql.Expression{b.repo, b.commit}
116+
}
117+
118+
// IsNullable implements the Expression interface.
119+
func (b *Blame) IsNullable() bool {
120+
return b.repo.IsNullable() || (b.commit.IsNullable())
121+
}
122+
123+
// Resolved implements the Expression interface.
124+
func (b *Blame) Resolved() bool {
125+
return b.repo.Resolved() && b.commit.Resolved()
126+
}
127+
128+
// Eval implements the sql.Expression interface.
129+
func (b *Blame) Eval(ctx *sql.Context, row sql.Row) (interface{}, error) {
130+
span, ctx := ctx.Span("gitbase.Blame")
131+
defer span.Finish()
132+
133+
repo, err := b.resolveRepo(ctx, row)
134+
if err != nil {
135+
ctx.Warn(0, err.Error())
136+
return nil, nil
137+
}
138+
139+
commit, err := b.resolveCommit(ctx, repo, row)
140+
if err != nil {
141+
ctx.Warn(0, err.Error())
142+
return nil, nil
143+
}
144+
145+
fIter, err := commit.Files()
146+
if err != nil {
147+
return nil, err
148+
}
149+
150+
bg, err := NewBlameGenerator(commit, fIter)
151+
if err != nil {
152+
return nil, err
153+
}
154+
155+
return bg, nil
156+
}
157+
158+
func (b *Blame) resolveCommit(ctx *sql.Context, repo *gitbase.Repository, row sql.Row) (*object.Commit, error) {
159+
str, err := exprToString(ctx, b.commit, row)
160+
if err != nil {
161+
return nil, err
162+
}
163+
164+
commitHash, err := repo.ResolveRevision(plumbing.Revision(str))
165+
if err != nil {
166+
h := plumbing.NewHash(str)
167+
commitHash = &h
168+
}
169+
to, err := repo.CommitObject(*commitHash)
170+
if err != nil {
171+
return nil, err
172+
}
173+
174+
return to, nil
175+
}
176+
177+
func (b *Blame) resolveRepo(ctx *sql.Context, r sql.Row) (*gitbase.Repository, error) {
178+
repoID, err := exprToString(ctx, b.repo, r)
179+
if err != nil {
180+
return nil, err
181+
}
182+
s, ok := ctx.Session.(*gitbase.Session)
183+
if !ok {
184+
return nil, gitbase.ErrInvalidGitbaseSession.New(ctx.Session)
185+
}
186+
return s.Pool.GetRepo(repoID)
187+
}

0 commit comments

Comments
 (0)