Skip to content

Commit 13d3d66

Browse files
authored
refactoring: remove un-used code, add go doc, fix ci (#199)
Refactoring, consisting of - remove unused method `isAuxiliaryLanguage` and `FileCountList` in order to reduce public API surfaces (go/java) - add GoDoc to public APIs - ci: java profile use latest go src It also now mimics https://docs.travis-ci.com/user/languages/go/#go-import-path for non-go build image, as code relies on internal imports. TEST PLAN: - make test
1 parent fe18dc0 commit 13d3d66

File tree

10 files changed

+51
-149
lines changed

10 files changed

+51
-149
lines changed

.travis.yml

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,11 +45,19 @@ jobs:
4545
stage: test
4646
language: scala
4747
jdk: oraclejdk8
48+
before_install:
49+
# mimics exact behavior of 'go_import_path' for non-go build image
50+
- export GOPATH=${TRAVIS_HOME}/gopath
51+
- mkdir -p ${GOPATH}/src/gopkg.in/src-d/enry.v1
52+
- tar -Pczf ${TRAVIS_TMPDIR}/src_archive.tar.gz -C ${TRAVIS_BUILD_DIR} . && tar -Pxzf ${TRAVIS_TMPDIR}/src_archive.tar.gz -C ${TRAVIS_HOME}/gopath/src/gopkg.in/src-d/enry.v1
53+
- export TRAVIS_BUILD_DIR=${TRAVIS_HOME}/gopath/src/gopkg.in/src-d/enry.v1
54+
- cd ${TRAVIS_HOME}/gopath/src/gopkg.in/src-d/enry.v1
4855
install:
4956
- gimme version
5057
- eval "$(curl -sL https://raw.githubusercontent.com/travis-ci/gimme/master/gimme | GIMME_GO_VERSION=$GO_VERSION bash)"
5158
- go version
52-
- go get -v gopkg.in/src-d/enry.v1/...
59+
- echo $PWD; echo $GOPATH
60+
- go get -v ./...
5361
before_script:
5462
- cd java
5563
- make

cmd/enry/main.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -278,7 +278,7 @@ func printFileAnalysis(file string, limit int64, isJSON bool) error {
278278
// functions below can work on a sample
279279
fileType := getFileType(file, data)
280280
language := enry.GetLanguage(file, data)
281-
mimeType := enry.GetMimeType(file, language)
281+
mimeType := enry.GetMIMEType(file, language)
282282

283283
if isJSON {
284284
return json.NewEncoder(os.Stdout).Encode(map[string]interface{}{

common.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ var DefaultStrategies = []Strategy{
2626
GetLanguagesByClassifier,
2727
}
2828

29+
// DefaultClassifier is a naive Bayes classifier based on Linguist samples.
2930
var DefaultClassifier Classifier = &classifier{
3031
languagesLogProbabilities: data.LanguagesLogProbabilities,
3132
tokensLogProbabilities: data.TokensLogProbabilities,

enry.go

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,16 @@
1+
/*
2+
Package enry implements multiple strategies for programming language identification.
3+
4+
Identification is made based on file name and file content using a seriece
5+
of strategies to narrow down possible option.
6+
Each strategy is available as a separate API call, as well as a main enty point
7+
8+
GetLanguage(filename string, content []byte) (language string)
9+
10+
It is a port of the https://github.com/github/linguist from Ruby.
11+
Upstream Linguist YAML files are used to generate datastructures for data
12+
package.
13+
*/
114
package enry // import "gopkg.in/src-d/enry.v1"
215

316
//go:generate make code-generate

java/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ $(RESOURCES_DIR): os-shared-lib
2828
cp -R $(RESOURCES_SRC) $(RESOURCES_DIR)
2929

3030
$(JNAERATOR_JAR): $(RESOURCES_DIR)
31-
mkdir $(JNAERATOR_DIR) && \
31+
mkdir -p $(JNAERATOR_DIR) && \
3232
wget $(JNAERATOR_JAR_URL) -O $(JNAERATOR_JAR)
3333

3434
os-shared-lib:

java/src/main/java/tech/sourced/enry/Enry.java

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -9,16 +9,6 @@ public class Enry {
99

1010
private static final EnryLibrary nativeLib = EnryLibrary.INSTANCE;
1111

12-
/**
13-
* Returns whether the given language is auxiliary or not.
14-
*
15-
* @param language name of the language, e.g. PHP, HTML, ...
16-
* @return if it's an auxiliary language
17-
*/
18-
public static synchronized boolean isAuxiliaryLanguage(String language) {
19-
return toJavaBool(nativeLib.IsAuxiliaryLanguage(toGoString(language)));
20-
}
21-
2212
/**
2313
* Returns the language of the given file based on the filename and its
2414
* contents.

java/src/test/java/tech/sourced/enry/EnryTest.java

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,6 @@
66

77
public class EnryTest {
88

9-
@Test
10-
public void isAuxiliaryLanguage() {
11-
assertTrue(Enry.isAuxiliaryLanguage("HTML"));
12-
assertFalse(Enry.isAuxiliaryLanguage("Go"));
13-
}
14-
159
@Test
1610
public void getLanguage() {
1711
String code = "<?php $foo = bar();";

shared/enry.go

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -93,12 +93,7 @@ func GetLanguagesByVimModeline(filename string, content []byte, candidates []str
9393

9494
//export GetMimeType
9595
func GetMimeType(path string, language string) string {
96-
return enry.GetMimeType(path, language)
97-
}
98-
99-
//export IsAuxiliaryLanguage
100-
func IsAuxiliaryLanguage(lang string) bool {
101-
return enry.IsAuxiliaryLanguage(lang)
96+
return enry.GetMIMEType(path, language)
10297
}
10398

10499
//export IsBinary

utils.go

Lines changed: 24 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -8,53 +8,20 @@ import (
88
"gopkg.in/src-d/enry.v1/data"
99
)
1010

11-
var (
12-
auxiliaryLanguages = map[string]bool{
13-
"Other": true, "XML": true, "YAML": true, "TOML": true, "INI": true,
14-
"JSON": true, "TeX": true, "Public Key": true, "AsciiDoc": true,
15-
"AGS Script": true, "VimL": true, "Diff": true, "CMake": true, "fish": true,
16-
"Awk": true, "Graphviz (DOT)": true, "Markdown": true, "desktop": true,
17-
"XSLT": true, "SQL": true, "RMarkdown": true, "IRC log": true,
18-
"reStructuredText": true, "Twig": true, "CSS": true, "Batchfile": true,
19-
"Text": true, "HTML+ERB": true, "HTML": true, "Gettext Catalog": true,
20-
"Smarty": true, "Raw token data": true,
21-
}
11+
const binSniffLen = 8000
2212

23-
configurationLanguages = map[string]bool{
24-
"XML": true, "JSON": true, "TOML": true, "YAML": true, "INI": true, "SQL": true,
25-
}
26-
)
27-
28-
// IsAuxiliaryLanguage returns whether or not lang is an auxiliary language.
29-
func IsAuxiliaryLanguage(lang string) bool {
30-
_, ok := auxiliaryLanguages[lang]
31-
return ok
13+
var configurationLanguages = map[string]bool{
14+
"XML": true, "JSON": true, "TOML": true, "YAML": true, "INI": true, "SQL": true,
3215
}
3316

34-
// IsConfiguration returns whether or not path is using a configuration language.
17+
// IsConfiguration tells if filename is in one of the configuration languages.
3518
func IsConfiguration(path string) bool {
3619
language, _ := GetLanguageByExtension(path)
3720
_, is := configurationLanguages[language]
3821
return is
3922
}
4023

41-
// IsDotFile returns whether or not path has dot as a prefix.
42-
func IsDotFile(path string) bool {
43-
path = filepath.Clean(path)
44-
base := filepath.Base(path)
45-
return strings.HasPrefix(base, ".") && base != "." && base != ".."
46-
}
47-
48-
// IsVendor returns whether or not path is a vendor path.
49-
func IsVendor(path string) bool {
50-
return data.VendorMatchers.Match(path)
51-
}
52-
53-
// IsDocumentation returns whether or not path is a documentation path.
54-
func IsDocumentation(path string) bool {
55-
return data.DocumentationMatchers.Match(path)
56-
}
57-
24+
// IsImage tells if a given file is an image (PNG, JPEG or GIF format).
5825
func IsImage(path string) bool {
5926
extension := filepath.Ext(path)
6027
if extension == ".png" || extension == ".jpg" || extension == ".jpeg" || extension == ".gif" {
@@ -64,7 +31,8 @@ func IsImage(path string) bool {
6431
return false
6532
}
6633

67-
func GetMimeType(path string, language string) string {
34+
// GetMIMEType returns a MIME type of a given file based on its languages.
35+
func GetMIMEType(path string, language string) string {
6836
if mime, ok := data.LanguagesMime[language]; ok {
6937
return mime
7038
}
@@ -76,13 +44,27 @@ func GetMimeType(path string, language string) string {
7644
return "text/plain"
7745
}
7846

79-
const sniffLen = 8000
47+
// IsDocumentation returns whether or not path is a documentation path.
48+
func IsDocumentation(path string) bool {
49+
return data.DocumentationMatchers.Match(path)
50+
}
51+
52+
// IsDotFile returns whether or not path has dot as a prefix.
53+
func IsDotFile(path string) bool {
54+
base := filepath.Base(filepath.Clean(path))
55+
return strings.HasPrefix(base, ".") && base != "."
56+
}
57+
58+
// IsVendor returns whether or not path is a vendor path.
59+
func IsVendor(path string) bool {
60+
return data.VendorMatchers.Match(path)
61+
}
8062

8163
// IsBinary detects if data is a binary value based on:
8264
// http://git.kernel.org/cgit/git/git.git/tree/xdiff-interface.c?id=HEAD#n198
8365
func IsBinary(data []byte) bool {
84-
if len(data) > sniffLen {
85-
data = data[:sniffLen]
66+
if len(data) > binSniffLen {
67+
data = data[:binSniffLen]
8668
}
8769

8870
if bytes.IndexByte(data, byte(0)) == -1 {
@@ -91,17 +73,3 @@ func IsBinary(data []byte) bool {
9173

9274
return true
9375
}
94-
95-
// FileCount type stores language name and count of files belonging to the
96-
// language.
97-
type FileCount struct {
98-
Name string
99-
Count int
100-
}
101-
102-
// FileCountList type is a list of FileCounts.
103-
type FileCountList []FileCount
104-
105-
func (fcl FileCountList) Len() int { return len(fcl) }
106-
func (fcl FileCountList) Less(i, j int) bool { return fcl[i].Count < fcl[j].Count }
107-
func (fcl FileCountList) Swap(i, j int) { fcl[i], fcl[j] = fcl[j], fcl[i] }

utils_test.go

Lines changed: 1 addition & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -3,38 +3,11 @@ package enry
33
import (
44
"bytes"
55
"fmt"
6-
"sort"
76
"testing"
87

98
"github.com/stretchr/testify/assert"
109
)
1110

12-
func TestIsAuxiliaryLanguage(t *testing.T) {
13-
type testType struct {
14-
name string
15-
lang string
16-
expected bool
17-
}
18-
19-
tests := []testType{
20-
{name: "TestIsAuxiliaryLanguage_Invalid", lang: "invalid", expected: false},
21-
}
22-
for k := range auxiliaryLanguages {
23-
t := testType{
24-
name: fmt.Sprintf("TestIsAuxiliaryLanguage_%s", k),
25-
lang: k,
26-
expected: true,
27-
}
28-
tests = append(tests, t)
29-
}
30-
31-
for _, test := range tests {
32-
is := IsAuxiliaryLanguage(test.lang)
33-
assert.Equal(t, is, test.expected,
34-
fmt.Sprintf("%v: is = %v, expected: %v", test.name, is, test.expected))
35-
}
36-
}
37-
3811
func TestIsVendor(t *testing.T) {
3912
tests := []struct {
4013
name string
@@ -106,7 +79,7 @@ func TestGetMimeType(t *testing.T) {
10679
}
10780

10881
for _, test := range tests {
109-
is := GetMimeType(test.path, test.lang)
82+
is := GetMIMEType(test.path, test.lang)
11083
assert.Equal(t, is, test.expected, fmt.Sprintf("%v: is = %v, expected: %v", test.name, is, test.expected))
11184
}
11285
}
@@ -160,43 +133,3 @@ func TestIsDotFile(t *testing.T) {
160133
assert.Equal(t, test.expected, is, fmt.Sprintf("%v: is = %v, expected: %v", test.name, is, test.expected))
161134
}
162135
}
163-
164-
func TestFileCountListSort(t *testing.T) {
165-
sampleData := FileCountList{{"a", 8}, {"b", 65}, {"c", 20}, {"d", 90}}
166-
const ascending = "ASC"
167-
const descending = "DESC"
168-
169-
tests := []struct {
170-
name string
171-
data FileCountList
172-
order string
173-
expectedData FileCountList
174-
}{
175-
{
176-
name: "ascending order",
177-
data: sampleData,
178-
order: ascending,
179-
expectedData: FileCountList{{"a", 8}, {"c", 20}, {"b", 65}, {"d", 90}},
180-
},
181-
{
182-
name: "descending order",
183-
data: sampleData,
184-
order: descending,
185-
expectedData: FileCountList{{"d", 90}, {"b", 65}, {"c", 20}, {"a", 8}},
186-
},
187-
}
188-
189-
for _, test := range tests {
190-
t.Run(test.name, func(t *testing.T) {
191-
if test.order == descending {
192-
sort.Sort(sort.Reverse(test.data))
193-
} else {
194-
sort.Sort(test.data)
195-
}
196-
197-
for i := 0; i < len(test.data); i++ {
198-
assert.Equal(t, test.data[i], test.expectedData[i], fmt.Sprintf("%v: FileCount at position %d = %v, expected: %v", test.name, i, test.data[i], test.expectedData[i]))
199-
}
200-
})
201-
}
202-
}

0 commit comments

Comments
 (0)