diff --git a/src/Lib/Parser.idr b/src/Lib/Parser.idr index 86f25f8..5fb7a40 100644 --- a/src/Lib/Parser.idr +++ b/src/Lib/Parser.idr @@ -42,6 +42,29 @@ stringLit = do t <- token StringKind pure $ RLit fc (LString (cast t)) + +-- typeExpr is term with arrows. +export typeExpr : Parser Raw +export term : (Parser Raw) + +interp : Parser Raw +interp = token StartInterp *> term <* token EndInterp + + +interpString : Parser Raw +interpString = do + fc <- getPos + ignore $ token StartQuote + part <- term + parts <- many (stringLit <|> interp) + ignore $ token EndQuote + pure $ foldl append part parts + where + append : Raw -> Raw -> Raw + append t u = + let fc = getFC t in + (RApp fc (RApp fc (RVar fc "_++_") t Explicit) u Explicit) + intLit : Parser Raw intLit = do fc <- getPos @@ -56,11 +79,9 @@ charLit = do pure $ RLit fc (LChar $ assert_total $ strIndex v 0) lit : Parser Raw -lit = intLit <|> stringLit <|> charLit +lit = intLit <|> interpString <|> stringLit <|> charLit + --- typeExpr is term with arrows. -export typeExpr : Parser Raw -export term : (Parser Raw) -- helpful when we've got some / many and need FC for each withPos : Parser a -> Parser (FC, a) diff --git a/src/Lib/Token.idr b/src/Lib/Token.idr index 2f7ff85..5437f82 100644 --- a/src/Lib/Token.idr +++ b/src/Lib/Token.idr @@ -42,6 +42,10 @@ data Kind | Semi | RBrace | EOI + | StartQuote + | EndQuote + | StartInterp + | EndInterp export Show Kind where @@ -62,6 +66,10 @@ Show Kind where show StringKind = "String" show JSLit = "JSLit" show Projection = "Projection" + show StartQuote = "StartQuote" + show EndQuote = "EndQuote" + show StartInterp = "StartInterp" + show EndInterp = "EndInterp" export Eq Kind where diff --git a/src/Lib/Tokenizer.idr b/src/Lib/Tokenizer.idr index a33d5b0..86e20c4 100644 --- a/src/Lib/Tokenizer.idr +++ b/src/Lib/Tokenizer.idr @@ -2,6 +2,9 @@ module Lib.Tokenizer -- Working alternate tokenizer, saves about 2k, can be translated to newt +-- Currently this processes a stream of characters, I may switch it to +-- combinators for readability in the future. + import Lib.Token import Lib.Common import Data.String @@ -20,30 +23,78 @@ keywords = ["let", "in", "where", "case", "of", "data", "U", "do", "$", "λ", "?", "@", ".", "->", "→", ":", "=>", ":=", "=", "<-", "\\", "_", "|"] +record TState where + constructor TS + trow : Int + tcol : Int + acc : SnocList BTok + chars : List Char + -- This makes a big case tree... -tokenise' : Int -> Int -> SnocList BTok -> List Char -> Either Error (List BTok) -tokenise' sl sc toks chars = case chars of - Nil => Right $ toks <>> [] - ' ' :: cs => tokenise' sl (sc + 1) toks cs - '\n' :: cs => tokenise' (sl + 1) 0 toks cs - '{' :: '{' :: cs => tokenise' sl (sc + 2) (toks :< mktok False sl (sc + 2) Keyword "{{" ) cs - '}' :: '}' :: cs => tokenise' sl (sc + 2) (toks :< mktok False sl (sc + 2) Keyword "}}" ) cs - ',' :: cs => tokenise' sl (sc + 1) (toks :< mktok False sl (sc + 1) Ident ",") cs - '_' :: ',' :: '_' :: cs => tokenise' sl (sc + 3) (toks :< mktok False sl (sc + 3) MixFix "_,_") cs - '_' :: '.' :: '_' :: cs => tokenise' sl (sc + 3) (toks :< mktok False sl (sc + 3) MixFix "_._") cs +rawTokenise : TState -> Either Error TState + +quoteTokenise : TState -> Int -> Int -> SnocList Char -> Either Error TState +quoteTokenise ts@(TS el ec toks chars) sl sc acc = case chars of + '"' :: cs => Right (TS el ec (toks :< stok) chars) + '\\' :: '{' :: cs => do + let tok = MkBounded (Tok StartInterp "\\{") (MkBounds el ec el (ec + 2)) + (TS sl sc toks chars) <- rawTokenise $ TS el (ec + 2) (toks :< stok :< tok) cs + case chars of + '}' :: cs => + let tok = MkBounded (Tok EndInterp "}") (MkBounds el ec el (ec + 1)) + in quoteTokenise (TS el (ec + 1) (toks :< tok) cs) el (ec + 1) [<] + cs => Left $ E (MkFC "" (el, ec)) "Expected '{'" + c :: cs => do + quoteTokenise (TS el (ec + 1) toks cs) sl sc (acc :< c) + Nil => Left $ E (MkFC "" (el, ec)) "Expected '\"' at EOF" + + where + stok : BTok + stok = MkBounded (Tok StringKind (pack $ acc <>> [])) (MkBounds sl sc el ec) + + + +rawTokenise ts@(TS sl sc toks chars) = case chars of + Nil => Right $ ts + ' ' :: cs => rawTokenise (TS sl (sc + 1) toks cs) + '\n' :: cs => rawTokenise (TS (sl + 1) 0 toks cs) + '{' :: '{' :: cs => rawTokenise (TS sl (sc + 2) (toks :< mktok False sl (sc + 2) Keyword "{{" ) cs) + '}' :: '}' :: cs => rawTokenise (TS sl (sc + 2) (toks :< mktok False sl (sc + 2) Keyword "}}" ) cs) + + '"' :: cs => do + let tok = mktok False sl (sc + 1) StartQuote "\"" + (TS sl sc toks chars) <- quoteTokenise (TS sl (sc + 1) (toks :< tok) cs) sl (sc + 1) [<] + case chars of + '"' :: cs => let tok = mktok False sl (sc + 1) EndQuote "\"" in + rawTokenise (TS sl (sc + 1) (toks :< tok) cs) + cs => Left $ E (MkFC "" (sl, sc)) "Expected '\"'" + + + '}' :: cs => Right ts + '{' :: cs => do + let tok = mktok False sl (sc + 1) Symbol "{" + (TS sl sc toks chars) <- rawTokenise (TS sl (sc + 1) (toks :< tok) cs) + case chars of + '}' :: cs => let tok = mktok False sl (sc + 1) Symbol "}" in + rawTokenise (TS sl (sc + 1) (toks :< tok) cs) + cs => Left $ E (MkFC "" (sl, sc)) "Expected '}'" + + ',' :: cs => rawTokenise (TS sl (sc + 1) (toks :< mktok False sl (sc + 1) Ident ",") cs) + '_' :: ',' :: '_' :: cs => rawTokenise (TS sl (sc + 3) (toks :< mktok False sl (sc + 3) MixFix "_,_") cs) + '_' :: '.' :: '_' :: cs => rawTokenise (TS sl (sc + 3) (toks :< mktok False sl (sc + 3) MixFix "_._") cs) '\'' :: '\\' :: c :: '\'' :: cs => let ch = ifThenElse (c == 'n') '\n' c - in tokenise' sl (sc + 4) (toks :< mktok False sl (sc + 4) Character (singleton ch)) cs - '\'' :: c :: '\'' :: cs => tokenise' sl (sc + 3) (toks :< mktok False sl (sc + 3) Character (singleton c)) cs + in rawTokenise (TS sl (sc + 4) (toks :< mktok False sl (sc + 4) Character (singleton ch)) cs) + '\'' :: c :: '\'' :: cs => rawTokenise (TS sl (sc + 3) (toks :< mktok False sl (sc + 3) Character (singleton c)) cs) '#' :: cs => ?do_pragma -- we probably want to require at least one alpha? - '-' :: '-' :: cs => lineComment sl (sc + 2) toks cs - '/' :: '-' :: cs => blockComment sl (sc + 2) toks cs - '`' :: cs => doBacktick sl (sc + 1) toks cs [<] - '"' :: cs => doQuote sl (sc + 1) toks cs [<] - '.' :: cs => doRest sl (sc + 1) toks cs Projection isIdent (Lin :< '.') + '-' :: '-' :: cs => lineComment (TS sl (sc + 2) toks cs) + '/' :: '-' :: cs => blockComment (TS sl (sc + 2) toks cs) + '`' :: cs => doBacktick (TS sl (sc + 1) toks cs) [<] + '"' :: cs => doQuote (TS sl (sc + 1) toks cs) [<] + '.' :: cs => doRest (TS sl (sc + 1) toks cs) Projection isIdent (Lin :< '.') '-' :: c :: cs => if isDigit c - then doRest sl (sc + 2) toks cs Number isDigit (Lin :< '-' :< c) - else doRest sl (sc + 1) toks (c :: cs) Ident isIdent (Lin :< '-') + then doRest (TS sl (sc + 2) toks cs) Number isDigit (Lin :< '-' :< c) + else doRest (TS sl (sc + 1) toks (c :: cs)) Ident isIdent (Lin :< '-') c :: cs => doChar c cs @@ -54,13 +105,13 @@ tokenise' sl sc toks chars = case chars of isUIdent : Char -> Bool isUIdent c = isIdent c || c == '.' - doBacktick : Int -> Int -> SnocList BTok -> List Char -> SnocList Char -> Either Error (List BTok) - doBacktick l c toks Nil acc = Left $ E (MkFC "" (l,c)) "EOF in backtick string" - doBacktick el ec toks ('`' :: cs) acc = + doBacktick : TState -> SnocList Char -> Either Error TState + doBacktick (TS l c toks Nil) acc = Left $ E (MkFC "" (l,c)) "EOF in backtick string" + doBacktick (TS el ec toks ('`' :: cs)) acc = let tok = MkBounded (Tok JSLit (pack $ acc <>> [])) (MkBounds sl sc el ec) in - tokenise' el (ec + 1) (toks :< tok) cs - doBacktick l c toks ('\n' :: cs) acc = doBacktick (l + 1) 0 toks cs (acc :< '\n') - doBacktick l c toks (ch :: cs) acc = doBacktick l (c + 1) toks cs (acc :< ch) + rawTokenise (TS el (ec + 1) (toks :< tok) cs) + doBacktick (TS l c toks ('\n' :: cs)) acc = doBacktick (TS (l + 1) 0 toks cs) (acc :< '\n') + doBacktick (TS l c toks (ch :: cs)) acc = doBacktick (TS l (c + 1) toks cs) (acc :< ch) -- temporary use same token as before @@ -68,42 +119,45 @@ tokenise' sl sc toks chars = case chars of mktok checkkw el ec kind text = let kind = if checkkw && elem text keywords then Keyword else kind in MkBounded (Tok kind text) (MkBounds sl sc el ec) - lineComment : Int -> Int -> SnocList BTok -> List Char -> Either Error (List BTok) - lineComment line col toks Nil = tokenise' line col toks Nil - lineComment line col toks ('\n' :: cs) = tokenise' (line + 1) 0 toks cs - lineComment line col toks (c :: cs) = lineComment line (col + 1) toks cs + lineComment : TState -> Either Error TState + lineComment (TS line col toks Nil) = rawTokenise (TS line col toks Nil) + lineComment (TS line col toks ('\n' :: cs)) = rawTokenise (TS (line + 1) 0 toks cs) + lineComment (TS line col toks (c :: cs)) = lineComment (TS line (col + 1) toks cs) - blockComment : Int -> Int -> SnocList BTok -> List Char -> Either Error (List BTok) - blockComment line col toks Nil = Left $ E (MkFC "" (line, col)) "EOF in block comment" - blockComment line col toks ('-' :: '/' :: cs) = tokenise' line (col + 2) toks cs - blockComment line col toks ('\n' :: cs) = blockComment (line + 1) 0 toks cs - blockComment line col toks (c :: cs) = blockComment line (col + 1) toks cs + blockComment : TState -> Either Error TState + blockComment (TS line col toks Nil) = Left $ E (MkFC "" (line, col)) "EOF in block comment" + blockComment (TS line col toks ('-' :: '/' :: cs)) = rawTokenise (TS line (col + 2) toks cs) + blockComment (TS line col toks ('\n' :: cs)) = blockComment (TS (line + 1) 0 toks cs) + blockComment (TS line col toks (c :: cs)) = blockComment (TS line (col + 1) toks cs) - doRest : Int -> Int -> SnocList BTok -> List Char -> Kind -> (Char -> Bool) -> SnocList Char -> Either Error (List BTok) - doRest l c toks Nil kind pred acc = tokenise' l c (toks :< mktok True l c kind (pack $ acc <>> [])) Nil - doRest l c toks (ch :: cs) kind pred acc = if pred ch - then doRest l (c + 1) toks cs kind pred (acc :< ch) + doRest : TState -> Kind -> (Char -> Bool) -> SnocList Char -> Either Error TState + doRest (TS l c toks Nil) kind pred acc = rawTokenise (TS l c (toks :< mktok True l c kind (pack $ acc <>> [])) Nil) + doRest (TS l c toks (ch :: cs)) kind pred acc = if pred ch + then doRest (TS l (c + 1) toks cs) kind pred (acc :< ch) else let kind = if elem '_' acc then MixFix else kind in - tokenise' l c (toks :< mktok True l (c - 1) kind (pack $ acc <>> [])) (ch :: cs) + rawTokenise (TS l c (toks :< mktok True l (c - 1) kind (pack $ acc <>> [])) (ch :: cs)) - doQuote : Int -> Int -> SnocList BTok -> List Char -> SnocList Char -> Either Error (List BTok) + doQuote : TState -> SnocList Char -> Either Error TState -- should be an error.. - doQuote line col toks Nil acc = ?missing_end_quote - doQuote line col toks ('\\' :: 'n' :: cs) acc = doQuote line (col + 2) toks cs (acc :< '\n') - doQuote line col toks ('\\' :: c :: cs) acc = doQuote line (col + 2) toks cs (acc :< c) - doQuote line col toks ('\n' :: cs) acc = ?error_newline_in_quote - doQuote line col toks ('"' :: cs) acc = tokenise' line (col + 1) (toks :< mktok False line (col + 1) StringKind (pack $ acc <>> [])) cs - doQuote line col toks (c :: cs) acc = doQuote line (col + 1) toks cs (acc :< c) + doQuote (TS line col toks Nil) acc = ?missing_end_quote + doQuote (TS line col toks ('\\' :: 'n' :: cs)) acc = doQuote (TS line (col + 2) toks cs) (acc :< '\n') + doQuote (TS line col toks ('\\' :: c :: cs)) acc = doQuote (TS line (col + 2) toks cs) (acc :< c) + doQuote (TS line col toks ('\n' :: cs)) acc = ?error_newline_in_quote + doQuote (TS line col toks ('"' :: cs)) acc = rawTokenise (TS line (col + 1) (toks :< mktok False line (col + 1) StringKind (pack $ acc <>> [])) cs) + doQuote (TS line col toks (c :: cs)) acc = doQuote (TS line (col + 1) toks cs) (acc :< c) - doChar : Char -> List Char -> Either Error (List BTok) + doChar : Char -> List Char -> Either Error TState doChar c cs = if elem c standalone - then tokenise' sl (sc + 1) (toks :< mktok True sl (sc + 1) Symbol (singleton c)) cs + then rawTokenise (TS sl (sc + 1) (toks :< mktok True sl (sc + 1) Symbol (singleton c)) cs) else let kind = if isDigit c then Number else if isUpper c then UIdent else Ident in - doRest sl sc toks (c :: cs) kind isIdent [<] + doRest (TS sl sc toks (c :: cs)) kind isIdent [<] export tokenise : String -> String -> Either Error (List BTok) -tokenise fn text = case tokenise' 0 0 Lin (unpack text) of - Left (E (MkFC file start) str) => Left (E (MkFC fn start) str) - res => res +tokenise fn text = case rawTokenise (TS 0 0 Lin (unpack text)) of + Right (TS trow tcol acc []) => Right $ acc <>> [] + Right (TS trow tcol acc chars) => Left $ E (MkFC fn (trow, tcol)) "Extra toks" + Left (E (MkFC file start) str) => Left $ E (MkFC fn start) str + Left err => Left err +