diff --git a/TODO.md b/TODO.md index ca1777c..98d3ed8 100644 --- a/TODO.md +++ b/TODO.md @@ -3,14 +3,13 @@ More comments in code! This is getting big enough that I need to re-find my bearings when fixing stuff. -- [ ] tokenizer +- [x] tokenizer that can be ported to newt - [ ] string interpolation - [ ] editor - indent newline on let with no in - I've seen this done in vi for Idris, but it seems non-trivial in vscode. - [x] Move on to next decl in case of error - [x] for parse error, seek to col 0 token and process next decl -- [ ] record initialization sugar, e.g. `{ x := 1, y := 2 }` - [ ] record update sugar, syntax TBD - I think I'm going to hold off on this for now as it requires the type to elaborate. This ends up at the head of an app, which typically is inferred. We'd need a special case somewhere that infers its argument instead. - [ ] Change `Ord` to be more like Idris - LT / EQ / GT (and entail equality) diff --git a/src/Lib/Elab.idr b/src/Lib/Elab.idr index 6bd86ce..e719056 100644 --- a/src/Lib/Elab.idr +++ b/src/Lib/Elab.idr @@ -984,7 +984,7 @@ showDef : Context -> List String -> Nat -> Val -> M String showDef ctx names n v@(VVar _ n' [<]) = if n == n' then pure "" else pure "= \{pprint names !(quote ctx.lvl v)}" showDef ctx names n v = pure "= \{pprint names !(quote ctx.lvl v)}" - +-- desugar do undo : FC -> List DoStmt -> M Raw undo prev [] = error prev "do block must end in expression" undo prev ((DoExpr fc tm) :: Nil) = pure tm diff --git a/src/Lib/Tokenizer.idr b/src/Lib/Tokenizer.idr index 11c3a50..207c096 100644 --- a/src/Lib/Tokenizer.idr +++ b/src/Lib/Tokenizer.idr @@ -84,22 +84,18 @@ rawTokens -- { is singleton except for {{ <|> match (exact "{{" <|> exact "}}") (Tok Keyword) -- need to make this an ident - <|> match (exact ",") (checkKW) + <|> match (exact ",") (Tok Ident) -- for now, our lambda slash is singleton - <|> match (singleton) (Tok Symbol) - -- TODO Drop MixFix token type when we support if_then_else_ <|> match (exact "_,_" <|> exact "_._") (Tok MixFix) - -- REVIEW - expect non-alpha after? <|> match (opt (exact "-") <+> some digit) (Tok Number) - -- for module names and maybe type constructors <|> match (charLit) (Tok Character . unquoteChar) <|> match (is '#' <+> many alpha) (Tok Pragma) <|> match (lineComment (exact "--")) (Tok Space) <|> match (blockComment (exact "/-") (exact "-/")) (Tok Space) - <|> match (upper <+> many identMore) checkUKW <|> match (btick <+> manyUntil btick any <+> btick) (Tok JSLit . trimJS) <|> match (quo <+> manyUntil quo (esc any <|> any) <+> quo) (Tok StringKind . unquote) - -- accept almost everything, but + <|> match (singleton) (Tok Symbol) + <|> match (upper <+> many identMore) checkUKW <|> match (some (non (space <|> singleton))) checkKW notSpace : WithBounds (Token Kind) -> Bool diff --git a/src/Lib/Tokenizer2.idr b/src/Lib/Tokenizer2.idr new file mode 100644 index 0000000..663c61a --- /dev/null +++ b/src/Lib/Tokenizer2.idr @@ -0,0 +1,103 @@ +module Lib.Tokenizer2 + +-- Working alternate tokenizer, saves about 2k, can be translated to newt + +import Lib.Token +import Lib.Common +import Data.String + +standalone : List Char +standalone = unpack "()\\{}[],.@" + +keywords : List String +keywords = ["let", "in", "where", "case", "of", "data", "U", "do", + "ptype", "pfunc", "module", "infixl", "infixr", "infix", + "∀", "forall", "import", "uses", + "class", "instance", "record", "constructor", + "if", "then", "else", + "$", "λ", "?", "@", + "->", "→", ":", "=>", ":=", "=", "<-", "\\", "_", "|"] + +-- This makes a big case tree... +tokenise' : Int -> Int -> SnocList BTok -> List Char -> Either Error (List BTok) +tokenise' sl sc toks chars = case chars of + Nil => Right $ toks <>> [] + ' ' :: cs => tokenise' sl (sc + 1) toks cs + '\n' :: cs => tokenise' (sl + 1) 0 toks cs + '{' :: '{' :: cs => tokenise' sl (sc + 2) (toks :< mktok sl (sc + 2) Keyword "{{" ) cs + '}' :: '}' :: cs => tokenise' sl (sc + 2) (toks :< mktok sl (sc + 2) Keyword "}}" ) cs + ',' :: cs => tokenise' sl (sc + 1) (toks :< mktok sl (sc + 1) Ident ",") cs + '_' :: ',' :: '_' :: cs => tokenise' sl (sc + 3) (toks :< mktok sl (sc + 3) MixFix "_,_") cs + '_' :: '.' :: '_' :: cs => tokenise' sl (sc + 3) (toks :< mktok sl (sc + 3) MixFix "_._") cs + '\'' :: c :: '\'' :: cs => tokenise' sl (sc + 3) (toks :< mktok sl (sc + 3) Character (singleton c)) cs + '\'' :: '\\' :: c :: '\'' :: cs => + let ch = ifThenElse (c == 'n') '\n' c + in tokenise' sl (sc + 4) (toks :< mktok sl (sc + 4) Character (singleton ch)) cs + '#' :: cs => ?do_pragma -- we probably want to require at least one alpha? + '-' :: '-' :: cs => lineComment sl (sc + 2) toks cs + '/' :: '-' :: cs => blockComment sl (sc + 2) toks cs + '`' :: cs => doBacktick sl (sc + 1) toks cs [<] + '"' :: cs => doQuote sl (sc + 1) toks cs [<] + '-' :: c :: cs => if isDigit c + then doRest sl (sc + 2) toks cs Number isDigit (Lin :< '-' :< c) + else doRest sl (sc + 1) toks (c :: cs) Ident isIdent (Lin :< '-') + + c :: cs => doChar c cs + + where + isIdent : Char -> Bool + isIdent c = not (isSpace c || elem c standalone) + + doBacktick : Int -> Int -> SnocList BTok -> List Char -> SnocList Char -> Either Error (List BTok) + doBacktick l c toks Nil acc = Left $ E (MkFC "" (l,c)) "EOF in backtick string" + doBacktick el ec toks ('`' :: cs) acc = + let tok = MkBounded (Tok JSLit (pack $ acc <>> [])) False (MkBounds sl sc el ec) in + tokenise' el (ec + 1) (toks :< tok) cs + doBacktick l c toks ('\n' :: cs) acc = doBacktick (l + 1) 0 toks cs (acc :< '\n') + doBacktick l c toks (ch :: cs) acc = doBacktick l (c + 1) toks cs (acc :< ch) + + + -- temporary use same token as before + mktok : Int -> Int -> Kind -> String -> BTok + mktok el ec kind text = let kind = if elem text keywords then Keyword else kind in + MkBounded (Tok kind text) False (MkBounds sl sc el ec) + + lineComment : Int -> Int -> SnocList BTok -> List Char -> Either Error (List BTok) + lineComment line col toks Nil = tokenise' line col toks Nil + lineComment line col toks ('\n' :: cs) = tokenise' (line + 1) 0 toks cs + lineComment line col toks (c :: cs) = lineComment line (col + 1) toks cs + + blockComment : Int -> Int -> SnocList BTok -> List Char -> Either Error (List BTok) + blockComment line col toks Nil = Left $ E (MkFC "" (line, col)) "EOF in block comment" + blockComment line col toks ('-' :: '/' :: cs) = tokenise' line (col + 2) toks cs + blockComment line col toks ('\n' :: cs) = blockComment (line + 1) 0 toks cs + blockComment line col toks (c :: cs) = blockComment line (col + 1) toks cs + + + doRest : Int -> Int -> SnocList BTok -> List Char -> Kind -> (Char -> Bool) -> SnocList Char -> Either Error (List BTok) + doRest l c toks Nil kind pred acc = tokenise' l c (toks :< mktok l c kind (pack $ acc <>> [])) Nil + doRest l c toks (ch :: cs) kind pred acc = if pred ch + then doRest l (c + 1) toks cs kind pred (acc :< ch) + else + let kind = if elem '_' acc then MixFix else kind in + tokenise' l c (toks :< mktok l (c - 1) kind (pack $ acc <>> [])) (ch :: cs) + + doQuote : Int -> Int -> SnocList BTok -> List Char -> SnocList Char -> Either Error (List BTok) + -- should be an error.. + doQuote line col toks Nil acc = ?missing_end_quote + doQuote line col toks ('\\' :: 'n' :: cs) acc = doQuote line (col + 2) toks cs (acc :< '\n') + doQuote line col toks ('\\' :: c :: cs) acc = doQuote line (col + 2) toks cs (acc :< c) + doQuote line col toks ('\n' :: cs) acc = ?error_newline_in_quote + doQuote line col toks ('"' :: cs) acc = tokenise' line (col + 1) (toks :< mktok line (col + 1) StringKind (pack $ acc <>> [])) cs + doQuote line col toks (c :: cs) acc = doQuote line (col + 1) toks cs (acc :< c) + + doChar : Char -> List Char -> Either Error (List BTok) + doChar c cs = if elem c standalone + then tokenise' sl (sc + 1) (toks :< mktok sl (sc + 1) Symbol (singleton c)) cs + else let kind = if isDigit c then Number else if isUpper c then UIdent else Ident in + doRest sl sc toks (c :: cs) kind isIdent [<] +export +tokenise : String -> String -> Either Error (List BTok) +tokenise fn text = case tokenise' 0 0 Lin (unpack text) of + Left (E (MkFC file start) str) => Left (E (MkFC fn start) str) + res => res diff --git a/src/Main.idr b/src/Main.idr index a392e0e..455907b 100644 --- a/src/Main.idr +++ b/src/Main.idr @@ -18,7 +18,7 @@ import Lib.Parser.Impl import Lib.Prettier import Lib.ProcessDecl import Lib.Token -import Lib.Tokenizer +import Lib.Tokenizer2 import Lib.TopContext import Lib.Types import Lib.Syntax @@ -152,10 +152,11 @@ processModule importFC base stk name = do processFile : String -> M () processFile fn = do putStrLn "*** Process \{fn}" - let parts = splitPath fn - let file = fromMaybe "" $ last' parts - let dir = fromMaybe "./" $ parent fn - let (name,ext) = splitFileName (fromMaybe "" $ last' parts) + let parts = split (== '/') fn + let file = last parts + let dirs = init parts + let dir = if dirs == Nil then "." else joinBy "/" dirs + let (name,ext) = splitFileName file putStrLn "\{show dir} \{show name} \{show ext}" -- declare internal primitives diff --git a/tests/Zoo4eg.newt b/tests/Zoo4eg.newt index bc9d41a..7f521df 100644 --- a/tests/Zoo4eg.newt +++ b/tests/Zoo4eg.newt @@ -80,7 +80,7 @@ hundred : _ hundred = mul ten ten -- Leibniz equality -Eq : {A: U} -> A -> A -> U +Eq : {A : U} -> A -> A -> U Eq = \{A} x y => (P : A -> U) -> P x -> P y refl : {A : U} {x : A} -> Eq x x