remove a Text.Parser dependency (about 10%), and alternate tokenizer
This commit is contained in:
3
TODO.md
3
TODO.md
@@ -3,14 +3,13 @@
|
|||||||
|
|
||||||
More comments in code! This is getting big enough that I need to re-find my bearings when fixing stuff.
|
More comments in code! This is getting big enough that I need to re-find my bearings when fixing stuff.
|
||||||
|
|
||||||
- [ ] tokenizer
|
- [x] tokenizer that can be ported to newt
|
||||||
- [ ] string interpolation
|
- [ ] string interpolation
|
||||||
|
|
||||||
- [ ] editor - indent newline on let with no in
|
- [ ] editor - indent newline on let with no in
|
||||||
- I've seen this done in vi for Idris, but it seems non-trivial in vscode.
|
- I've seen this done in vi for Idris, but it seems non-trivial in vscode.
|
||||||
- [x] Move on to next decl in case of error
|
- [x] Move on to next decl in case of error
|
||||||
- [x] for parse error, seek to col 0 token and process next decl
|
- [x] for parse error, seek to col 0 token and process next decl
|
||||||
- [ ] record initialization sugar, e.g. `{ x := 1, y := 2 }`
|
|
||||||
- [ ] record update sugar, syntax TBD
|
- [ ] record update sugar, syntax TBD
|
||||||
- I think I'm going to hold off on this for now as it requires the type to elaborate. This ends up at the head of an app, which typically is inferred. We'd need a special case somewhere that infers its argument instead.
|
- I think I'm going to hold off on this for now as it requires the type to elaborate. This ends up at the head of an app, which typically is inferred. We'd need a special case somewhere that infers its argument instead.
|
||||||
- [ ] Change `Ord` to be more like Idris - LT / EQ / GT (and entail equality)
|
- [ ] Change `Ord` to be more like Idris - LT / EQ / GT (and entail equality)
|
||||||
|
|||||||
@@ -984,7 +984,7 @@ showDef : Context -> List String -> Nat -> Val -> M String
|
|||||||
showDef ctx names n v@(VVar _ n' [<]) = if n == n' then pure "" else pure "= \{pprint names !(quote ctx.lvl v)}"
|
showDef ctx names n v@(VVar _ n' [<]) = if n == n' then pure "" else pure "= \{pprint names !(quote ctx.lvl v)}"
|
||||||
showDef ctx names n v = pure "= \{pprint names !(quote ctx.lvl v)}"
|
showDef ctx names n v = pure "= \{pprint names !(quote ctx.lvl v)}"
|
||||||
|
|
||||||
|
-- desugar do
|
||||||
undo : FC -> List DoStmt -> M Raw
|
undo : FC -> List DoStmt -> M Raw
|
||||||
undo prev [] = error prev "do block must end in expression"
|
undo prev [] = error prev "do block must end in expression"
|
||||||
undo prev ((DoExpr fc tm) :: Nil) = pure tm
|
undo prev ((DoExpr fc tm) :: Nil) = pure tm
|
||||||
|
|||||||
@@ -84,22 +84,18 @@ rawTokens
|
|||||||
-- { is singleton except for {{
|
-- { is singleton except for {{
|
||||||
<|> match (exact "{{" <|> exact "}}") (Tok Keyword)
|
<|> match (exact "{{" <|> exact "}}") (Tok Keyword)
|
||||||
-- need to make this an ident
|
-- need to make this an ident
|
||||||
<|> match (exact ",") (checkKW)
|
<|> match (exact ",") (Tok Ident)
|
||||||
-- for now, our lambda slash is singleton
|
-- for now, our lambda slash is singleton
|
||||||
<|> match (singleton) (Tok Symbol)
|
|
||||||
-- TODO Drop MixFix token type when we support if_then_else_
|
|
||||||
<|> match (exact "_,_" <|> exact "_._") (Tok MixFix)
|
<|> match (exact "_,_" <|> exact "_._") (Tok MixFix)
|
||||||
-- REVIEW - expect non-alpha after?
|
|
||||||
<|> match (opt (exact "-") <+> some digit) (Tok Number)
|
<|> match (opt (exact "-") <+> some digit) (Tok Number)
|
||||||
-- for module names and maybe type constructors
|
|
||||||
<|> match (charLit) (Tok Character . unquoteChar)
|
<|> match (charLit) (Tok Character . unquoteChar)
|
||||||
<|> match (is '#' <+> many alpha) (Tok Pragma)
|
<|> match (is '#' <+> many alpha) (Tok Pragma)
|
||||||
<|> match (lineComment (exact "--")) (Tok Space)
|
<|> match (lineComment (exact "--")) (Tok Space)
|
||||||
<|> match (blockComment (exact "/-") (exact "-/")) (Tok Space)
|
<|> match (blockComment (exact "/-") (exact "-/")) (Tok Space)
|
||||||
<|> match (upper <+> many identMore) checkUKW
|
|
||||||
<|> match (btick <+> manyUntil btick any <+> btick) (Tok JSLit . trimJS)
|
<|> match (btick <+> manyUntil btick any <+> btick) (Tok JSLit . trimJS)
|
||||||
<|> match (quo <+> manyUntil quo (esc any <|> any) <+> quo) (Tok StringKind . unquote)
|
<|> match (quo <+> manyUntil quo (esc any <|> any) <+> quo) (Tok StringKind . unquote)
|
||||||
-- accept almost everything, but
|
<|> match (singleton) (Tok Symbol)
|
||||||
|
<|> match (upper <+> many identMore) checkUKW
|
||||||
<|> match (some (non (space <|> singleton))) checkKW
|
<|> match (some (non (space <|> singleton))) checkKW
|
||||||
|
|
||||||
notSpace : WithBounds (Token Kind) -> Bool
|
notSpace : WithBounds (Token Kind) -> Bool
|
||||||
|
|||||||
103
src/Lib/Tokenizer2.idr
Normal file
103
src/Lib/Tokenizer2.idr
Normal file
@@ -0,0 +1,103 @@
|
|||||||
|
module Lib.Tokenizer2
|
||||||
|
|
||||||
|
-- Working alternate tokenizer, saves about 2k, can be translated to newt
|
||||||
|
|
||||||
|
import Lib.Token
|
||||||
|
import Lib.Common
|
||||||
|
import Data.String
|
||||||
|
|
||||||
|
standalone : List Char
|
||||||
|
standalone = unpack "()\\{}[],.@"
|
||||||
|
|
||||||
|
keywords : List String
|
||||||
|
keywords = ["let", "in", "where", "case", "of", "data", "U", "do",
|
||||||
|
"ptype", "pfunc", "module", "infixl", "infixr", "infix",
|
||||||
|
"∀", "forall", "import", "uses",
|
||||||
|
"class", "instance", "record", "constructor",
|
||||||
|
"if", "then", "else",
|
||||||
|
"$", "λ", "?", "@",
|
||||||
|
"->", "→", ":", "=>", ":=", "=", "<-", "\\", "_", "|"]
|
||||||
|
|
||||||
|
-- This makes a big case tree...
|
||||||
|
tokenise' : Int -> Int -> SnocList BTok -> List Char -> Either Error (List BTok)
|
||||||
|
tokenise' sl sc toks chars = case chars of
|
||||||
|
Nil => Right $ toks <>> []
|
||||||
|
' ' :: cs => tokenise' sl (sc + 1) toks cs
|
||||||
|
'\n' :: cs => tokenise' (sl + 1) 0 toks cs
|
||||||
|
'{' :: '{' :: cs => tokenise' sl (sc + 2) (toks :< mktok sl (sc + 2) Keyword "{{" ) cs
|
||||||
|
'}' :: '}' :: cs => tokenise' sl (sc + 2) (toks :< mktok sl (sc + 2) Keyword "}}" ) cs
|
||||||
|
',' :: cs => tokenise' sl (sc + 1) (toks :< mktok sl (sc + 1) Ident ",") cs
|
||||||
|
'_' :: ',' :: '_' :: cs => tokenise' sl (sc + 3) (toks :< mktok sl (sc + 3) MixFix "_,_") cs
|
||||||
|
'_' :: '.' :: '_' :: cs => tokenise' sl (sc + 3) (toks :< mktok sl (sc + 3) MixFix "_._") cs
|
||||||
|
'\'' :: c :: '\'' :: cs => tokenise' sl (sc + 3) (toks :< mktok sl (sc + 3) Character (singleton c)) cs
|
||||||
|
'\'' :: '\\' :: c :: '\'' :: cs =>
|
||||||
|
let ch = ifThenElse (c == 'n') '\n' c
|
||||||
|
in tokenise' sl (sc + 4) (toks :< mktok sl (sc + 4) Character (singleton ch)) cs
|
||||||
|
'#' :: cs => ?do_pragma -- we probably want to require at least one alpha?
|
||||||
|
'-' :: '-' :: cs => lineComment sl (sc + 2) toks cs
|
||||||
|
'/' :: '-' :: cs => blockComment sl (sc + 2) toks cs
|
||||||
|
'`' :: cs => doBacktick sl (sc + 1) toks cs [<]
|
||||||
|
'"' :: cs => doQuote sl (sc + 1) toks cs [<]
|
||||||
|
'-' :: c :: cs => if isDigit c
|
||||||
|
then doRest sl (sc + 2) toks cs Number isDigit (Lin :< '-' :< c)
|
||||||
|
else doRest sl (sc + 1) toks (c :: cs) Ident isIdent (Lin :< '-')
|
||||||
|
|
||||||
|
c :: cs => doChar c cs
|
||||||
|
|
||||||
|
where
|
||||||
|
isIdent : Char -> Bool
|
||||||
|
isIdent c = not (isSpace c || elem c standalone)
|
||||||
|
|
||||||
|
doBacktick : Int -> Int -> SnocList BTok -> List Char -> SnocList Char -> Either Error (List BTok)
|
||||||
|
doBacktick l c toks Nil acc = Left $ E (MkFC "" (l,c)) "EOF in backtick string"
|
||||||
|
doBacktick el ec toks ('`' :: cs) acc =
|
||||||
|
let tok = MkBounded (Tok JSLit (pack $ acc <>> [])) False (MkBounds sl sc el ec) in
|
||||||
|
tokenise' el (ec + 1) (toks :< tok) cs
|
||||||
|
doBacktick l c toks ('\n' :: cs) acc = doBacktick (l + 1) 0 toks cs (acc :< '\n')
|
||||||
|
doBacktick l c toks (ch :: cs) acc = doBacktick l (c + 1) toks cs (acc :< ch)
|
||||||
|
|
||||||
|
|
||||||
|
-- temporary use same token as before
|
||||||
|
mktok : Int -> Int -> Kind -> String -> BTok
|
||||||
|
mktok el ec kind text = let kind = if elem text keywords then Keyword else kind in
|
||||||
|
MkBounded (Tok kind text) False (MkBounds sl sc el ec)
|
||||||
|
|
||||||
|
lineComment : Int -> Int -> SnocList BTok -> List Char -> Either Error (List BTok)
|
||||||
|
lineComment line col toks Nil = tokenise' line col toks Nil
|
||||||
|
lineComment line col toks ('\n' :: cs) = tokenise' (line + 1) 0 toks cs
|
||||||
|
lineComment line col toks (c :: cs) = lineComment line (col + 1) toks cs
|
||||||
|
|
||||||
|
blockComment : Int -> Int -> SnocList BTok -> List Char -> Either Error (List BTok)
|
||||||
|
blockComment line col toks Nil = Left $ E (MkFC "" (line, col)) "EOF in block comment"
|
||||||
|
blockComment line col toks ('-' :: '/' :: cs) = tokenise' line (col + 2) toks cs
|
||||||
|
blockComment line col toks ('\n' :: cs) = blockComment (line + 1) 0 toks cs
|
||||||
|
blockComment line col toks (c :: cs) = blockComment line (col + 1) toks cs
|
||||||
|
|
||||||
|
|
||||||
|
doRest : Int -> Int -> SnocList BTok -> List Char -> Kind -> (Char -> Bool) -> SnocList Char -> Either Error (List BTok)
|
||||||
|
doRest l c toks Nil kind pred acc = tokenise' l c (toks :< mktok l c kind (pack $ acc <>> [])) Nil
|
||||||
|
doRest l c toks (ch :: cs) kind pred acc = if pred ch
|
||||||
|
then doRest l (c + 1) toks cs kind pred (acc :< ch)
|
||||||
|
else
|
||||||
|
let kind = if elem '_' acc then MixFix else kind in
|
||||||
|
tokenise' l c (toks :< mktok l (c - 1) kind (pack $ acc <>> [])) (ch :: cs)
|
||||||
|
|
||||||
|
doQuote : Int -> Int -> SnocList BTok -> List Char -> SnocList Char -> Either Error (List BTok)
|
||||||
|
-- should be an error..
|
||||||
|
doQuote line col toks Nil acc = ?missing_end_quote
|
||||||
|
doQuote line col toks ('\\' :: 'n' :: cs) acc = doQuote line (col + 2) toks cs (acc :< '\n')
|
||||||
|
doQuote line col toks ('\\' :: c :: cs) acc = doQuote line (col + 2) toks cs (acc :< c)
|
||||||
|
doQuote line col toks ('\n' :: cs) acc = ?error_newline_in_quote
|
||||||
|
doQuote line col toks ('"' :: cs) acc = tokenise' line (col + 1) (toks :< mktok line (col + 1) StringKind (pack $ acc <>> [])) cs
|
||||||
|
doQuote line col toks (c :: cs) acc = doQuote line (col + 1) toks cs (acc :< c)
|
||||||
|
|
||||||
|
doChar : Char -> List Char -> Either Error (List BTok)
|
||||||
|
doChar c cs = if elem c standalone
|
||||||
|
then tokenise' sl (sc + 1) (toks :< mktok sl (sc + 1) Symbol (singleton c)) cs
|
||||||
|
else let kind = if isDigit c then Number else if isUpper c then UIdent else Ident in
|
||||||
|
doRest sl sc toks (c :: cs) kind isIdent [<]
|
||||||
|
export
|
||||||
|
tokenise : String -> String -> Either Error (List BTok)
|
||||||
|
tokenise fn text = case tokenise' 0 0 Lin (unpack text) of
|
||||||
|
Left (E (MkFC file start) str) => Left (E (MkFC fn start) str)
|
||||||
|
res => res
|
||||||
11
src/Main.idr
11
src/Main.idr
@@ -18,7 +18,7 @@ import Lib.Parser.Impl
|
|||||||
import Lib.Prettier
|
import Lib.Prettier
|
||||||
import Lib.ProcessDecl
|
import Lib.ProcessDecl
|
||||||
import Lib.Token
|
import Lib.Token
|
||||||
import Lib.Tokenizer
|
import Lib.Tokenizer2
|
||||||
import Lib.TopContext
|
import Lib.TopContext
|
||||||
import Lib.Types
|
import Lib.Types
|
||||||
import Lib.Syntax
|
import Lib.Syntax
|
||||||
@@ -152,10 +152,11 @@ processModule importFC base stk name = do
|
|||||||
processFile : String -> M ()
|
processFile : String -> M ()
|
||||||
processFile fn = do
|
processFile fn = do
|
||||||
putStrLn "*** Process \{fn}"
|
putStrLn "*** Process \{fn}"
|
||||||
let parts = splitPath fn
|
let parts = split (== '/') fn
|
||||||
let file = fromMaybe "" $ last' parts
|
let file = last parts
|
||||||
let dir = fromMaybe "./" $ parent fn
|
let dirs = init parts
|
||||||
let (name,ext) = splitFileName (fromMaybe "" $ last' parts)
|
let dir = if dirs == Nil then "." else joinBy "/" dirs
|
||||||
|
let (name,ext) = splitFileName file
|
||||||
putStrLn "\{show dir} \{show name} \{show ext}"
|
putStrLn "\{show dir} \{show name} \{show ext}"
|
||||||
|
|
||||||
-- declare internal primitives
|
-- declare internal primitives
|
||||||
|
|||||||
@@ -80,7 +80,7 @@ hundred : _
|
|||||||
hundred = mul ten ten
|
hundred = mul ten ten
|
||||||
|
|
||||||
-- Leibniz equality
|
-- Leibniz equality
|
||||||
Eq : {A: U} -> A -> A -> U
|
Eq : {A : U} -> A -> A -> U
|
||||||
Eq = \{A} x y => (P : A -> U) -> P x -> P y
|
Eq = \{A} x y => (P : A -> U) -> P x -> P y
|
||||||
|
|
||||||
refl : {A : U} {x : A} -> Eq x x
|
refl : {A : U} {x : A} -> Eq x x
|
||||||
|
|||||||
Reference in New Issue
Block a user