Skip to content

Commit 910b234

Browse files
committed
wip: add range to tokens
1 parent 1a4e228 commit 910b234

File tree

8 files changed

+43
-35
lines changed

8 files changed

+43
-35
lines changed

src/FsLex.Core/fslexast.fs

Lines changed: 17 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@ open System.Collections.Generic
66
open System.Globalization
77
open FSharp.Text.Lexing
88

9-
type Ident = string
10-
type Code = string * Position
9+
type Ident = string * Range
10+
type Code = string * Range
1111

1212

1313
type ParseContext = {
@@ -144,10 +144,11 @@ type Regexp =
144144
| Inp of Input
145145
| Star of Regexp
146146
| Macro of Ident
147-
type Clause = Regexp * Code
148147

149-
type Rule = (Ident * Ident list * Clause list)
150-
type Macro = Ident * Regexp
148+
type Clause = { Matcher: Regexp; Code: Code }
149+
150+
type Rule = { Name: Ident; Arguments: Ident list; Clauses: Clause list }
151+
type Macro = { Name: Ident; Matcher: Regexp }
151152

152153
type Spec =
153154
{ TopCode: Code
@@ -192,7 +193,7 @@ type NfaNodeMap() =
192193
map.[nodeId] <-node
193194
node
194195

195-
let LexerStateToNfa ctx (macros: Map<string,_>) (clauses: Clause list) =
196+
let LexerStateToNfa ctx (macros: Map<string,Macro>) (clauses: Clause list) =
196197

197198
/// Table allocating node ids
198199
let nfaNodeMap = new NfaNodeMap()
@@ -201,7 +202,7 @@ let LexerStateToNfa ctx (macros: Map<string,_>) (clauses: Clause list) =
201202
let rec CompileRegexp re dest =
202203
match re with
203204
| Alt res ->
204-
let trs = res ctx |> List.map (fun re -> (Epsilon,CompileRegexp re dest))
205+
let trs = res ctx |> List.map (fun re -> (Epsilon, CompileRegexp re dest))
205206
nfaNodeMap.NewNfaNode(trs,[])
206207
| Seq res ->
207208
List.foldBack (CompileRegexp) res dest
@@ -224,9 +225,9 @@ let LexerStateToNfa ctx (macros: Map<string,_>) (clauses: Clause list) =
224225
let sre = CompileRegexp re nfaNode
225226
AddToMultiMap nfaNode.Transitions Epsilon sre
226227
nfaNodeMap.NewNfaNode([(Epsilon,sre); (Epsilon,dest)],[])
227-
| Macro m ->
228-
if not <| macros.ContainsKey(m) then failwithf "The macro %s is not defined" m
229-
CompileRegexp macros.[m] dest
228+
| Macro (name, _) as m ->
229+
if not <| macros.ContainsKey(name) then failwithf "The macro %s is not defined" name
230+
CompileRegexp macros.[name].Matcher dest
230231

231232
// These cases unwind the difficult cases in the syntax that rely on knowing the
232233
// entire alphabet.
@@ -274,13 +275,13 @@ let LexerStateToNfa ctx (macros: Map<string,_>) (clauses: Clause list) =
274275
let actions = new System.Collections.Generic.List<_>()
275276

276277
/// Compile an acceptance of a regular expression into the NFA
277-
let sTrans macros nodeId (regexp,code) =
278+
let sTrans macros nodeId { Matcher = regexp; Code = code } =
278279
let actionId = actions.Count
279280
actions.Add(code)
280-
let sAccept = nfaNodeMap.NewNfaNode([],[(nodeId,actionId)])
281+
let sAccept = nfaNodeMap.NewNfaNode([], [(nodeId, actionId)])
281282
CompileRegexp regexp sAccept
282283

283-
let trs = clauses |> List.mapi (fun n x -> (Epsilon,sTrans macros n x))
284+
let trs = clauses |> List.mapi (fun n x -> (Epsilon, sTrans macros n x))
284285
let nfaStartNode = nfaNodeMap.NewNfaNode(trs,[])
285286
nfaStartNode,(actions |> Seq.readonly), nfaNodeMap
286287

@@ -407,10 +408,10 @@ let NfaToDfa (nfaNodeMap:NfaNodeMap) nfaStartNode =
407408
ruleStartNode,ruleNodes
408409

409410
let Compile ctx spec =
410-
let macros = Map.ofList spec.Macros
411+
let macros = Map.ofList (spec.Macros |> List.map (fun m -> fst m.Name, m))
411412
List.foldBack
412-
(fun (name,args,clauses) (perRuleData,dfaNodes) ->
413-
let nfa, actions, nfaNodeMap = LexerStateToNfa ctx macros clauses
413+
(fun rule (perRuleData,dfaNodes) ->
414+
let nfa, actions, nfaNodeMap = LexerStateToNfa ctx macros rule.Clauses
414415
let ruleStartNode, ruleNodes = NfaToDfa nfaNodeMap nfa
415416
//printfn "name = %s, ruleStartNode = %O" name ruleStartNode.Id
416417
(ruleStartNode,actions) :: perRuleData, ruleNodes @ dfaNodes)

src/FsLex.Core/fslexdriver.fs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ type Writer(fileName) =
3636
member x.write fmt =
3737
Printf.fprintf os fmt
3838

39-
member x.writeCode (code, pos: Position) =
39+
member x.writeCode (code, { startPos = pos }) =
4040
if pos <> Position.Empty // If bottom code is unspecified, then position is empty.
4141
then
4242
x.writeLine "# %d \"%s\"" pos.Line pos.FileName
@@ -175,13 +175,13 @@ let writeRules (rules: Rule list) (perRuleData: PerRuleData) outputFileName (wri
175175
// These actions push the additional start state and come first, because they are then typically inlined into later
176176
// rules. This means more tailcalls are taken as direct branches, increasing efficiency and
177177
// improving stack usage on platforms that do not take tailcalls.
178-
for ((startNode, actions),(ident,args,_)) in List.zip perRuleData rules do
178+
for ((startNode, actions),{ Name = (ident, _); Arguments = args } ) in List.zip perRuleData rules do
179179
writer.writeLine "// Rule %s" ident
180180
writer.writeLine "and %s %s lexbuf =" ident (String.Join(" ", Array.ofList args))
181181
writer.writeLine " match _fslex_tables.Interpret(%d,lexbuf) with" startNode.Id
182-
actions |> Seq.iteri (fun i (code:string, pos) ->
182+
actions |> Seq.iteri (fun i (code:string, range) ->
183183
writer.writeLine " | %d -> ( " i
184-
writer.writeLine "# %d \"%s\"" pos.Line pos.FileName
184+
writer.writeLine "# %d \"%s\"" range.startPos.Line range.startPos.FileName
185185
let lines = code.Split([| '\r'; '\n' |], StringSplitOptions.RemoveEmptyEntries)
186186
for line in lines do
187187
writer.writeLine " %s" line

src/FsLex.Core/fslexlex.fsl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,7 @@ and string p buff = parse
152152
| _ { let _ = buff.Append (lexeme lexbuf).[0] in
153153
string p buff lexbuf }
154154
and code p buff = parse
155-
| "}" { CODE (buff.ToString(), p) }
155+
| "}" { CODE (buff.ToString(), { startPos = p; endPos = lexbuf.EndPos }) }
156156
| "{" { let _ = buff.Append (lexeme lexbuf) in
157157
ignore(code p buff lexbuf);
158158
let _ = buff.Append "}" in

src/FsLex.Core/fslexpars.fsy

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,12 @@ spec:
2626
{ TopCode=$1;Macros=$2;Rules=$4;BottomCode=$5 }
2727
}
2828

29+
ident:
30+
| IDENT { $1, parseState.InputRange 1 }
31+
2932
codeopt:
3033
| CODE { $1 }
31-
| { "", (parseState.ResultRange |> fst) }
34+
| { "", parseState.ResultRange }
3235

3336
Macros:
3437
| { [] }
@@ -37,8 +40,8 @@ Macros:
3740
}
3841

3942
macro:
40-
| LET IDENT EQUALS regexp {
41-
($2, $4)
43+
| LET ident EQUALS regexp {
44+
{ Name = $2; Matcher = $4 }
4245
}
4346

4447
Rules:
@@ -48,13 +51,13 @@ Rules:
4851
| rule { [$1] }
4952

5053
rule:
51-
| IDENT args EQUALS PARSE optbar clauses {
52-
($1,$2,$6)
54+
| ident args EQUALS PARSE optbar clauses {
55+
{ Name = $1; Arguments = $2; Clauses = $6 }
5356
}
5457

5558
args:
5659
| { [] }
57-
| IDENT args { $1 :: $2 }
60+
| ident args { $1 :: $2 }
5861

5962
optbar:
6063
| { }
@@ -65,15 +68,15 @@ clauses:
6568
| clause { [$1] }
6669

6770
clause:
68-
| regexp CODE { $1, $2 }
71+
| regexp CODE { { Matcher = $1; Code = $2 } }
6972

7073
regexp:
7174
| CHAR { Inp(Alphabet(EncodeChar $1))}
7275
| UNICODE_CATEGORY { Inp(UnicodeCategory $1)}
7376
| EOF { Inp(Alphabet(fun ctx -> Eof))}
7477
| UNDERSCORE { Inp Any }
7578
| STRING { Seq([ for n in 0 .. $1.Length - 1 -> Inp(Alphabet(EncodeChar $1.[n]))])}
76-
| IDENT { Macro($1) }
79+
| ident { Macro($1) }
7780
| regexp regexp %prec regexp_seq { Seq[$1;$2] }
7881
| regexp PLUS %prec regexp_plus { Seq[$1;Star $1] }
7982
| regexp STAR %prec regexp_star { Star $1 }

src/FsLexYacc.Runtime/Lexing.fs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ module FSharp.Text.Lexing
66
open System.Collections.Generic
77

88
// REVIEW: This type showed up on a parsing-intensive performance measurement. Consider whether it can be a struct-record later when we have this feature. -jomo
9+
910
[<Struct>]
1011
type Position =
1112
{ pos_fname : string
@@ -61,6 +62,7 @@ type Position =
6162
pos_bol= 0
6263
pos_cnum=0 }
6364

65+
type [<Struct>] Range = { startPos: Position; endPos: Position }
6466
type LexBufferFiller<'char> =
6567
{ fillSync : (LexBuffer<'char> -> unit) option
6668
fillAsync : (LexBuffer<'char> -> Async<unit>) option }

src/FsLexYacc.Runtime/Lexing.fsi

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,8 @@ type Position =
7070
/// Get a position corresponding to the first line (line number 1) in a given file
7171
static member FirstLine : filename:string -> Position
7272

73+
type [<Struct>] Range = { startPos: Position; endPos: Position }
74+
7375
[<Sealed>]
7476
/// Input buffers consumed by lexers generated by <c>fslex.exe </c>
7577
type LexBuffer<'char> =

src/FsLexYacc.Runtime/Parsing.fs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,13 @@ open System
77
open System.Collections.Generic
88

99
type IParseState =
10-
abstract InputRange: int -> Position * Position
10+
abstract InputRange: int -> Range
1111

1212
abstract InputEndPosition: int -> Position
1313

1414
abstract InputStartPosition: int -> Position
1515

16-
abstract ResultRange: Position * Position
16+
abstract ResultRange: Range
1717

1818
abstract GetInput: int -> obj
1919

@@ -254,11 +254,11 @@ module Implementation =
254254

255255
let parseState =
256256
{ new IParseState with
257-
member __.InputRange(n) = ruleStartPoss.[n-1], ruleEndPoss.[n-1]
257+
member __.InputRange(n) = { startPos = ruleStartPoss.[n-1]; endPos = ruleEndPoss.[n-1] }
258258
member __.InputStartPosition(n) = ruleStartPoss.[n-1]
259259
member __.InputEndPosition(n) = ruleEndPoss.[n-1]
260260
member __.GetInput(n) = ruleValues.[n-1]
261-
member __.ResultRange = (lhsPos.[0], lhsPos.[1])
261+
member __.ResultRange = { startPos = lhsPos.[0]; endPos = lhsPos.[1] }
262262
member __.ParserLocalStore = (localStore :> IDictionary<_,_>)
263263
member __.RaiseError() = raise RecoverableParseError (* NOTE: this binding tests the fairly complex logic associated with an object expression implementing a generic abstract method *)
264264
}

src/FsLexYacc.Runtime/Parsing.fsi

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ open System.Collections.Generic
1010
/// The information accessible via the <c>parseState</c> value within parser actions.
1111
type IParseState =
1212
/// Get the start and end position for the terminal or non-terminal at a given index matched by the production
13-
abstract InputRange: index:int -> Position * Position
13+
abstract InputRange: index:int -> Range
1414

1515
/// Get the end position for the terminal or non-terminal at a given index matched by the production
1616
abstract InputEndPosition: int -> Position
@@ -19,7 +19,7 @@ type IParseState =
1919
abstract InputStartPosition: int -> Position
2020

2121
/// Get the full range of positions matched by the production
22-
abstract ResultRange: Position * Position
22+
abstract ResultRange: Range
2323

2424
/// Get the value produced by the terminal or non-terminal at the given position
2525
abstract GetInput : int -> obj

0 commit comments

Comments
 (0)