diff --git a/TODO.md b/TODO.md index 98d3ed8..472d7f7 100644 --- a/TODO.md +++ b/TODO.md @@ -4,10 +4,7 @@ More comments in code! This is getting big enough that I need to re-find my bearings when fixing stuff. - [x] tokenizer that can be ported to newt -- [ ] string interpolation - -- [ ] editor - indent newline on let with no in - - I've seen this done in vi for Idris, but it seems non-trivial in vscode. +- [ ] string interpolation? - [x] Move on to next decl in case of error - [x] for parse error, seek to col 0 token and process next decl - [ ] record update sugar, syntax TBD diff --git a/src/Lib/Tokenizer2.idr b/src/Lib/Tokenizer2.idr index 663c61a..935d6a7 100644 --- a/src/Lib/Tokenizer2.idr +++ b/src/Lib/Tokenizer2.idr @@ -24,15 +24,15 @@ tokenise' sl sc toks chars = case chars of Nil => Right $ toks <>> [] ' ' :: cs => tokenise' sl (sc + 1) toks cs '\n' :: cs => tokenise' (sl + 1) 0 toks cs - '{' :: '{' :: cs => tokenise' sl (sc + 2) (toks :< mktok sl (sc + 2) Keyword "{{" ) cs - '}' :: '}' :: cs => tokenise' sl (sc + 2) (toks :< mktok sl (sc + 2) Keyword "}}" ) cs - ',' :: cs => tokenise' sl (sc + 1) (toks :< mktok sl (sc + 1) Ident ",") cs - '_' :: ',' :: '_' :: cs => tokenise' sl (sc + 3) (toks :< mktok sl (sc + 3) MixFix "_,_") cs - '_' :: '.' :: '_' :: cs => tokenise' sl (sc + 3) (toks :< mktok sl (sc + 3) MixFix "_._") cs - '\'' :: c :: '\'' :: cs => tokenise' sl (sc + 3) (toks :< mktok sl (sc + 3) Character (singleton c)) cs + '{' :: '{' :: cs => tokenise' sl (sc + 2) (toks :< mktok False sl (sc + 2) Keyword "{{" ) cs + '}' :: '}' :: cs => tokenise' sl (sc + 2) (toks :< mktok False sl (sc + 2) Keyword "}}" ) cs + ',' :: cs => tokenise' sl (sc + 1) (toks :< mktok False sl (sc + 1) Ident ",") cs + '_' :: ',' :: '_' :: cs => tokenise' sl (sc + 3) (toks :< mktok False sl (sc + 3) MixFix "_,_") cs + '_' :: '.' :: '_' :: cs => tokenise' sl (sc + 3) (toks :< mktok False sl (sc + 3) MixFix "_._") cs '\'' :: '\\' :: c :: '\'' :: cs => let ch = ifThenElse (c == 'n') '\n' c - in tokenise' sl (sc + 4) (toks :< mktok sl (sc + 4) Character (singleton ch)) cs + in tokenise' sl (sc + 4) (toks :< mktok False sl (sc + 4) Character (singleton ch)) cs + '\'' :: c :: '\'' :: cs => tokenise' sl (sc + 3) (toks :< mktok False sl (sc + 3) Character (singleton c)) cs '#' :: cs => ?do_pragma -- we probably want to require at least one alpha? '-' :: '-' :: cs => lineComment sl (sc + 2) toks cs '/' :: '-' :: cs => blockComment sl (sc + 2) toks cs @@ -58,8 +58,8 @@ tokenise' sl sc toks chars = case chars of -- temporary use same token as before - mktok : Int -> Int -> Kind -> String -> BTok - mktok el ec kind text = let kind = if elem text keywords then Keyword else kind in + mktok : Bool -> Int -> Int -> Kind -> String -> BTok + mktok checkkw el ec kind text = let kind = if checkkw && elem text keywords then Keyword else kind in MkBounded (Tok kind text) False (MkBounds sl sc el ec) lineComment : Int -> Int -> SnocList BTok -> List Char -> Either Error (List BTok) @@ -75,12 +75,12 @@ tokenise' sl sc toks chars = case chars of doRest : Int -> Int -> SnocList BTok -> List Char -> Kind -> (Char -> Bool) -> SnocList Char -> Either Error (List BTok) - doRest l c toks Nil kind pred acc = tokenise' l c (toks :< mktok l c kind (pack $ acc <>> [])) Nil + doRest l c toks Nil kind pred acc = tokenise' l c (toks :< mktok True l c kind (pack $ acc <>> [])) Nil doRest l c toks (ch :: cs) kind pred acc = if pred ch then doRest l (c + 1) toks cs kind pred (acc :< ch) else let kind = if elem '_' acc then MixFix else kind in - tokenise' l c (toks :< mktok l (c - 1) kind (pack $ acc <>> [])) (ch :: cs) + tokenise' l c (toks :< mktok True l (c - 1) kind (pack $ acc <>> [])) (ch :: cs) doQuote : Int -> Int -> SnocList BTok -> List Char -> SnocList Char -> Either Error (List BTok) -- should be an error.. @@ -88,14 +88,15 @@ tokenise' sl sc toks chars = case chars of doQuote line col toks ('\\' :: 'n' :: cs) acc = doQuote line (col + 2) toks cs (acc :< '\n') doQuote line col toks ('\\' :: c :: cs) acc = doQuote line (col + 2) toks cs (acc :< c) doQuote line col toks ('\n' :: cs) acc = ?error_newline_in_quote - doQuote line col toks ('"' :: cs) acc = tokenise' line (col + 1) (toks :< mktok line (col + 1) StringKind (pack $ acc <>> [])) cs + doQuote line col toks ('"' :: cs) acc = tokenise' line (col + 1) (toks :< mktok False line (col + 1) StringKind (pack $ acc <>> [])) cs doQuote line col toks (c :: cs) acc = doQuote line (col + 1) toks cs (acc :< c) doChar : Char -> List Char -> Either Error (List BTok) doChar c cs = if elem c standalone - then tokenise' sl (sc + 1) (toks :< mktok sl (sc + 1) Symbol (singleton c)) cs + then tokenise' sl (sc + 1) (toks :< mktok True sl (sc + 1) Symbol (singleton c)) cs else let kind = if isDigit c then Number else if isUpper c then UIdent else Ident in doRest sl sc toks (c :: cs) kind isIdent [<] + export tokenise : String -> String -> Either Error (List BTok) tokenise fn text = case tokenise' 0 0 Lin (unpack text) of