@@ -9,24 +9,15 @@ package flex
9
9
import "C"
10
10
import "unsafe"
11
11
12
- // TokenizeC is only calling a C-flex based tokenizer from linguist
13
- func TokenizeC (content []byte ) []string {
14
- cs := C .CBytes (content )
15
- defer C .free (unsafe .Pointer (cs ))
16
- // C.tokenizer_extract_tokens((*C.char)(cs))
17
- return nil
18
- }
19
-
20
12
const maxTokenLen = 32
21
13
22
14
23
15
// TokenizeFlex implements tokenizer by calling Flex generated code from linguist in C
16
+ // This is a transliteration from C https://github.com/github/linguist/blob/master/ext/linguist/linguist.c#L12
24
17
func TokenizeFlex (content []byte ) []string {
25
18
var buf C.YY_BUFFER_STATE
26
19
var scanner C.yyscan_t
27
20
var extra C.struct_tokenizer_extra
28
- // var scanner *C.yyscan_t = (*C.yyscan_t)(C.malloc(C.sizeof_yyscan_t))
29
- // var extra *C.struct_tokenizer_extra = (*C.struct_tokenizer_extra)(C.malloc(C.sizeof_struct_tokenizer_extra))
30
21
var _len C.ulong
31
22
var r C.int
32
23
@@ -50,7 +41,6 @@ func TokenizeFlex(content []byte) []string {
50
41
_len = C .strlen (extra .token )
51
42
if (_len <= maxTokenLen ) {
52
43
ary = append (ary , C .GoStringN (extra .token , (C .int )(_len )))
53
- //rb_ary_push(ary, rb_str_new(extra.token, len))
54
44
}
55
45
C .free (unsafe .Pointer (extra .token ))
56
46
break
@@ -59,9 +49,6 @@ func TokenizeFlex(content []byte) []string {
59
49
if (_len <= maxTokenLen ) {
60
50
s := "SHEBANG#!" + C .GoStringN (extra .token , (C .int )(_len ))
61
51
ary = append (ary , s )
62
- //s = rb_str_new2("SHEBANG#!");
63
- //rb_str_cat(s, extra.token, len);
64
- //rb_ary_push(ary, s);
65
52
}
66
53
C .free (unsafe .Pointer (extra .token ))
67
54
break
@@ -70,9 +57,6 @@ func TokenizeFlex(content []byte) []string {
70
57
if (_len <= maxTokenLen ) {
71
58
s := C .GoStringN (extra .token , (C .int )(_len )) + ">"
72
59
ary = append (ary , s )
73
- //s = rb_str_new(extra.token, len);
74
- //rb_str_cat2(s, ">");
75
- //rb_ary_push(ary, s);
76
60
}
77
61
C .free (unsafe .Pointer (extra .token ))
78
62
break
@@ -84,8 +68,6 @@ func TokenizeFlex(content []byte) []string {
84
68
85
69
C .linguist_yy_delete_buffer (buf , scanner )
86
70
C .linguist_yylex_destroy (scanner )
87
- // C.free(unsafe.Pointer(extra))
88
- // C.free(unsafe.Pointer(scanner))
89
71
90
72
return ary
91
73
}
0 commit comments