fix alternate tokenizer
This commit is contained in:
5
TODO.md
5
TODO.md
@@ -4,10 +4,7 @@
|
|||||||
More comments in code! This is getting big enough that I need to re-find my bearings when fixing stuff.
|
More comments in code! This is getting big enough that I need to re-find my bearings when fixing stuff.
|
||||||
|
|
||||||
- [x] tokenizer that can be ported to newt
|
- [x] tokenizer that can be ported to newt
|
||||||
- [ ] string interpolation
|
- [ ] string interpolation?
|
||||||
|
|
||||||
- [ ] editor - indent newline on let with no in
|
|
||||||
- I've seen this done in vi for Idris, but it seems non-trivial in vscode.
|
|
||||||
- [x] Move on to next decl in case of error
|
- [x] Move on to next decl in case of error
|
||||||
- [x] for parse error, seek to col 0 token and process next decl
|
- [x] for parse error, seek to col 0 token and process next decl
|
||||||
- [ ] record update sugar, syntax TBD
|
- [ ] record update sugar, syntax TBD
|
||||||
|
|||||||
@@ -24,15 +24,15 @@ tokenise' sl sc toks chars = case chars of
|
|||||||
Nil => Right $ toks <>> []
|
Nil => Right $ toks <>> []
|
||||||
' ' :: cs => tokenise' sl (sc + 1) toks cs
|
' ' :: cs => tokenise' sl (sc + 1) toks cs
|
||||||
'\n' :: cs => tokenise' (sl + 1) 0 toks cs
|
'\n' :: cs => tokenise' (sl + 1) 0 toks cs
|
||||||
'{' :: '{' :: cs => tokenise' sl (sc + 2) (toks :< mktok sl (sc + 2) Keyword "{{" ) cs
|
'{' :: '{' :: cs => tokenise' sl (sc + 2) (toks :< mktok False sl (sc + 2) Keyword "{{" ) cs
|
||||||
'}' :: '}' :: cs => tokenise' sl (sc + 2) (toks :< mktok sl (sc + 2) Keyword "}}" ) cs
|
'}' :: '}' :: cs => tokenise' sl (sc + 2) (toks :< mktok False sl (sc + 2) Keyword "}}" ) cs
|
||||||
',' :: cs => tokenise' sl (sc + 1) (toks :< mktok sl (sc + 1) Ident ",") cs
|
',' :: cs => tokenise' sl (sc + 1) (toks :< mktok False sl (sc + 1) Ident ",") cs
|
||||||
'_' :: ',' :: '_' :: cs => tokenise' sl (sc + 3) (toks :< mktok sl (sc + 3) MixFix "_,_") cs
|
'_' :: ',' :: '_' :: cs => tokenise' sl (sc + 3) (toks :< mktok False sl (sc + 3) MixFix "_,_") cs
|
||||||
'_' :: '.' :: '_' :: cs => tokenise' sl (sc + 3) (toks :< mktok sl (sc + 3) MixFix "_._") cs
|
'_' :: '.' :: '_' :: cs => tokenise' sl (sc + 3) (toks :< mktok False sl (sc + 3) MixFix "_._") cs
|
||||||
'\'' :: c :: '\'' :: cs => tokenise' sl (sc + 3) (toks :< mktok sl (sc + 3) Character (singleton c)) cs
|
|
||||||
'\'' :: '\\' :: c :: '\'' :: cs =>
|
'\'' :: '\\' :: c :: '\'' :: cs =>
|
||||||
let ch = ifThenElse (c == 'n') '\n' c
|
let ch = ifThenElse (c == 'n') '\n' c
|
||||||
in tokenise' sl (sc + 4) (toks :< mktok sl (sc + 4) Character (singleton ch)) cs
|
in tokenise' sl (sc + 4) (toks :< mktok False sl (sc + 4) Character (singleton ch)) cs
|
||||||
|
'\'' :: c :: '\'' :: cs => tokenise' sl (sc + 3) (toks :< mktok False sl (sc + 3) Character (singleton c)) cs
|
||||||
'#' :: cs => ?do_pragma -- we probably want to require at least one alpha?
|
'#' :: cs => ?do_pragma -- we probably want to require at least one alpha?
|
||||||
'-' :: '-' :: cs => lineComment sl (sc + 2) toks cs
|
'-' :: '-' :: cs => lineComment sl (sc + 2) toks cs
|
||||||
'/' :: '-' :: cs => blockComment sl (sc + 2) toks cs
|
'/' :: '-' :: cs => blockComment sl (sc + 2) toks cs
|
||||||
@@ -58,8 +58,8 @@ tokenise' sl sc toks chars = case chars of
|
|||||||
|
|
||||||
|
|
||||||
-- temporary use same token as before
|
-- temporary use same token as before
|
||||||
mktok : Int -> Int -> Kind -> String -> BTok
|
mktok : Bool -> Int -> Int -> Kind -> String -> BTok
|
||||||
mktok el ec kind text = let kind = if elem text keywords then Keyword else kind in
|
mktok checkkw el ec kind text = let kind = if checkkw && elem text keywords then Keyword else kind in
|
||||||
MkBounded (Tok kind text) False (MkBounds sl sc el ec)
|
MkBounded (Tok kind text) False (MkBounds sl sc el ec)
|
||||||
|
|
||||||
lineComment : Int -> Int -> SnocList BTok -> List Char -> Either Error (List BTok)
|
lineComment : Int -> Int -> SnocList BTok -> List Char -> Either Error (List BTok)
|
||||||
@@ -75,12 +75,12 @@ tokenise' sl sc toks chars = case chars of
|
|||||||
|
|
||||||
|
|
||||||
doRest : Int -> Int -> SnocList BTok -> List Char -> Kind -> (Char -> Bool) -> SnocList Char -> Either Error (List BTok)
|
doRest : Int -> Int -> SnocList BTok -> List Char -> Kind -> (Char -> Bool) -> SnocList Char -> Either Error (List BTok)
|
||||||
doRest l c toks Nil kind pred acc = tokenise' l c (toks :< mktok l c kind (pack $ acc <>> [])) Nil
|
doRest l c toks Nil kind pred acc = tokenise' l c (toks :< mktok True l c kind (pack $ acc <>> [])) Nil
|
||||||
doRest l c toks (ch :: cs) kind pred acc = if pred ch
|
doRest l c toks (ch :: cs) kind pred acc = if pred ch
|
||||||
then doRest l (c + 1) toks cs kind pred (acc :< ch)
|
then doRest l (c + 1) toks cs kind pred (acc :< ch)
|
||||||
else
|
else
|
||||||
let kind = if elem '_' acc then MixFix else kind in
|
let kind = if elem '_' acc then MixFix else kind in
|
||||||
tokenise' l c (toks :< mktok l (c - 1) kind (pack $ acc <>> [])) (ch :: cs)
|
tokenise' l c (toks :< mktok True l (c - 1) kind (pack $ acc <>> [])) (ch :: cs)
|
||||||
|
|
||||||
doQuote : Int -> Int -> SnocList BTok -> List Char -> SnocList Char -> Either Error (List BTok)
|
doQuote : Int -> Int -> SnocList BTok -> List Char -> SnocList Char -> Either Error (List BTok)
|
||||||
-- should be an error..
|
-- should be an error..
|
||||||
@@ -88,14 +88,15 @@ tokenise' sl sc toks chars = case chars of
|
|||||||
doQuote line col toks ('\\' :: 'n' :: cs) acc = doQuote line (col + 2) toks cs (acc :< '\n')
|
doQuote line col toks ('\\' :: 'n' :: cs) acc = doQuote line (col + 2) toks cs (acc :< '\n')
|
||||||
doQuote line col toks ('\\' :: c :: cs) acc = doQuote line (col + 2) toks cs (acc :< c)
|
doQuote line col toks ('\\' :: c :: cs) acc = doQuote line (col + 2) toks cs (acc :< c)
|
||||||
doQuote line col toks ('\n' :: cs) acc = ?error_newline_in_quote
|
doQuote line col toks ('\n' :: cs) acc = ?error_newline_in_quote
|
||||||
doQuote line col toks ('"' :: cs) acc = tokenise' line (col + 1) (toks :< mktok line (col + 1) StringKind (pack $ acc <>> [])) cs
|
doQuote line col toks ('"' :: cs) acc = tokenise' line (col + 1) (toks :< mktok False line (col + 1) StringKind (pack $ acc <>> [])) cs
|
||||||
doQuote line col toks (c :: cs) acc = doQuote line (col + 1) toks cs (acc :< c)
|
doQuote line col toks (c :: cs) acc = doQuote line (col + 1) toks cs (acc :< c)
|
||||||
|
|
||||||
doChar : Char -> List Char -> Either Error (List BTok)
|
doChar : Char -> List Char -> Either Error (List BTok)
|
||||||
doChar c cs = if elem c standalone
|
doChar c cs = if elem c standalone
|
||||||
then tokenise' sl (sc + 1) (toks :< mktok sl (sc + 1) Symbol (singleton c)) cs
|
then tokenise' sl (sc + 1) (toks :< mktok True sl (sc + 1) Symbol (singleton c)) cs
|
||||||
else let kind = if isDigit c then Number else if isUpper c then UIdent else Ident in
|
else let kind = if isDigit c then Number else if isUpper c then UIdent else Ident in
|
||||||
doRest sl sc toks (c :: cs) kind isIdent [<]
|
doRest sl sc toks (c :: cs) kind isIdent [<]
|
||||||
|
|
||||||
export
|
export
|
||||||
tokenise : String -> String -> Either Error (List BTok)
|
tokenise : String -> String -> Either Error (List BTok)
|
||||||
tokenise fn text = case tokenise' 0 0 Lin (unpack text) of
|
tokenise fn text = case tokenise' 0 0 Lin (unpack text) of
|
||||||
|
|||||||
Reference in New Issue
Block a user