string interpolation
This commit is contained in:
@@ -42,6 +42,29 @@ stringLit = do
|
|||||||
t <- token StringKind
|
t <- token StringKind
|
||||||
pure $ RLit fc (LString (cast t))
|
pure $ RLit fc (LString (cast t))
|
||||||
|
|
||||||
|
|
||||||
|
-- typeExpr is term with arrows.
|
||||||
|
export typeExpr : Parser Raw
|
||||||
|
export term : (Parser Raw)
|
||||||
|
|
||||||
|
interp : Parser Raw
|
||||||
|
interp = token StartInterp *> term <* token EndInterp
|
||||||
|
|
||||||
|
|
||||||
|
interpString : Parser Raw
|
||||||
|
interpString = do
|
||||||
|
fc <- getPos
|
||||||
|
ignore $ token StartQuote
|
||||||
|
part <- term
|
||||||
|
parts <- many (stringLit <|> interp)
|
||||||
|
ignore $ token EndQuote
|
||||||
|
pure $ foldl append part parts
|
||||||
|
where
|
||||||
|
append : Raw -> Raw -> Raw
|
||||||
|
append t u =
|
||||||
|
let fc = getFC t in
|
||||||
|
(RApp fc (RApp fc (RVar fc "_++_") t Explicit) u Explicit)
|
||||||
|
|
||||||
intLit : Parser Raw
|
intLit : Parser Raw
|
||||||
intLit = do
|
intLit = do
|
||||||
fc <- getPos
|
fc <- getPos
|
||||||
@@ -56,11 +79,9 @@ charLit = do
|
|||||||
pure $ RLit fc (LChar $ assert_total $ strIndex v 0)
|
pure $ RLit fc (LChar $ assert_total $ strIndex v 0)
|
||||||
|
|
||||||
lit : Parser Raw
|
lit : Parser Raw
|
||||||
lit = intLit <|> stringLit <|> charLit
|
lit = intLit <|> interpString <|> stringLit <|> charLit
|
||||||
|
|
||||||
|
|
||||||
-- typeExpr is term with arrows.
|
|
||||||
export typeExpr : Parser Raw
|
|
||||||
export term : (Parser Raw)
|
|
||||||
|
|
||||||
-- helpful when we've got some / many and need FC for each
|
-- helpful when we've got some / many and need FC for each
|
||||||
withPos : Parser a -> Parser (FC, a)
|
withPos : Parser a -> Parser (FC, a)
|
||||||
|
|||||||
@@ -42,6 +42,10 @@ data Kind
|
|||||||
| Semi
|
| Semi
|
||||||
| RBrace
|
| RBrace
|
||||||
| EOI
|
| EOI
|
||||||
|
| StartQuote
|
||||||
|
| EndQuote
|
||||||
|
| StartInterp
|
||||||
|
| EndInterp
|
||||||
|
|
||||||
export
|
export
|
||||||
Show Kind where
|
Show Kind where
|
||||||
@@ -62,6 +66,10 @@ Show Kind where
|
|||||||
show StringKind = "String"
|
show StringKind = "String"
|
||||||
show JSLit = "JSLit"
|
show JSLit = "JSLit"
|
||||||
show Projection = "Projection"
|
show Projection = "Projection"
|
||||||
|
show StartQuote = "StartQuote"
|
||||||
|
show EndQuote = "EndQuote"
|
||||||
|
show StartInterp = "StartInterp"
|
||||||
|
show EndInterp = "EndInterp"
|
||||||
|
|
||||||
export
|
export
|
||||||
Eq Kind where
|
Eq Kind where
|
||||||
|
|||||||
@@ -2,6 +2,9 @@ module Lib.Tokenizer
|
|||||||
|
|
||||||
-- Working alternate tokenizer, saves about 2k, can be translated to newt
|
-- Working alternate tokenizer, saves about 2k, can be translated to newt
|
||||||
|
|
||||||
|
-- Currently this processes a stream of characters, I may switch it to
|
||||||
|
-- combinators for readability in the future.
|
||||||
|
|
||||||
import Lib.Token
|
import Lib.Token
|
||||||
import Lib.Common
|
import Lib.Common
|
||||||
import Data.String
|
import Data.String
|
||||||
@@ -20,30 +23,78 @@ keywords = ["let", "in", "where", "case", "of", "data", "U", "do",
|
|||||||
"$", "λ", "?", "@", ".",
|
"$", "λ", "?", "@", ".",
|
||||||
"->", "→", ":", "=>", ":=", "=", "<-", "\\", "_", "|"]
|
"->", "→", ":", "=>", ":=", "=", "<-", "\\", "_", "|"]
|
||||||
|
|
||||||
|
record TState where
|
||||||
|
constructor TS
|
||||||
|
trow : Int
|
||||||
|
tcol : Int
|
||||||
|
acc : SnocList BTok
|
||||||
|
chars : List Char
|
||||||
|
|
||||||
-- This makes a big case tree...
|
-- This makes a big case tree...
|
||||||
tokenise' : Int -> Int -> SnocList BTok -> List Char -> Either Error (List BTok)
|
rawTokenise : TState -> Either Error TState
|
||||||
tokenise' sl sc toks chars = case chars of
|
|
||||||
Nil => Right $ toks <>> []
|
quoteTokenise : TState -> Int -> Int -> SnocList Char -> Either Error TState
|
||||||
' ' :: cs => tokenise' sl (sc + 1) toks cs
|
quoteTokenise ts@(TS el ec toks chars) sl sc acc = case chars of
|
||||||
'\n' :: cs => tokenise' (sl + 1) 0 toks cs
|
'"' :: cs => Right (TS el ec (toks :< stok) chars)
|
||||||
'{' :: '{' :: cs => tokenise' sl (sc + 2) (toks :< mktok False sl (sc + 2) Keyword "{{" ) cs
|
'\\' :: '{' :: cs => do
|
||||||
'}' :: '}' :: cs => tokenise' sl (sc + 2) (toks :< mktok False sl (sc + 2) Keyword "}}" ) cs
|
let tok = MkBounded (Tok StartInterp "\\{") (MkBounds el ec el (ec + 2))
|
||||||
',' :: cs => tokenise' sl (sc + 1) (toks :< mktok False sl (sc + 1) Ident ",") cs
|
(TS sl sc toks chars) <- rawTokenise $ TS el (ec + 2) (toks :< stok :< tok) cs
|
||||||
'_' :: ',' :: '_' :: cs => tokenise' sl (sc + 3) (toks :< mktok False sl (sc + 3) MixFix "_,_") cs
|
case chars of
|
||||||
'_' :: '.' :: '_' :: cs => tokenise' sl (sc + 3) (toks :< mktok False sl (sc + 3) MixFix "_._") cs
|
'}' :: cs =>
|
||||||
|
let tok = MkBounded (Tok EndInterp "}") (MkBounds el ec el (ec + 1))
|
||||||
|
in quoteTokenise (TS el (ec + 1) (toks :< tok) cs) el (ec + 1) [<]
|
||||||
|
cs => Left $ E (MkFC "" (el, ec)) "Expected '{'"
|
||||||
|
c :: cs => do
|
||||||
|
quoteTokenise (TS el (ec + 1) toks cs) sl sc (acc :< c)
|
||||||
|
Nil => Left $ E (MkFC "" (el, ec)) "Expected '\"' at EOF"
|
||||||
|
|
||||||
|
where
|
||||||
|
stok : BTok
|
||||||
|
stok = MkBounded (Tok StringKind (pack $ acc <>> [])) (MkBounds sl sc el ec)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
rawTokenise ts@(TS sl sc toks chars) = case chars of
|
||||||
|
Nil => Right $ ts
|
||||||
|
' ' :: cs => rawTokenise (TS sl (sc + 1) toks cs)
|
||||||
|
'\n' :: cs => rawTokenise (TS (sl + 1) 0 toks cs)
|
||||||
|
'{' :: '{' :: cs => rawTokenise (TS sl (sc + 2) (toks :< mktok False sl (sc + 2) Keyword "{{" ) cs)
|
||||||
|
'}' :: '}' :: cs => rawTokenise (TS sl (sc + 2) (toks :< mktok False sl (sc + 2) Keyword "}}" ) cs)
|
||||||
|
|
||||||
|
'"' :: cs => do
|
||||||
|
let tok = mktok False sl (sc + 1) StartQuote "\""
|
||||||
|
(TS sl sc toks chars) <- quoteTokenise (TS sl (sc + 1) (toks :< tok) cs) sl (sc + 1) [<]
|
||||||
|
case chars of
|
||||||
|
'"' :: cs => let tok = mktok False sl (sc + 1) EndQuote "\"" in
|
||||||
|
rawTokenise (TS sl (sc + 1) (toks :< tok) cs)
|
||||||
|
cs => Left $ E (MkFC "" (sl, sc)) "Expected '\"'"
|
||||||
|
|
||||||
|
|
||||||
|
'}' :: cs => Right ts
|
||||||
|
'{' :: cs => do
|
||||||
|
let tok = mktok False sl (sc + 1) Symbol "{"
|
||||||
|
(TS sl sc toks chars) <- rawTokenise (TS sl (sc + 1) (toks :< tok) cs)
|
||||||
|
case chars of
|
||||||
|
'}' :: cs => let tok = mktok False sl (sc + 1) Symbol "}" in
|
||||||
|
rawTokenise (TS sl (sc + 1) (toks :< tok) cs)
|
||||||
|
cs => Left $ E (MkFC "" (sl, sc)) "Expected '}'"
|
||||||
|
|
||||||
|
',' :: cs => rawTokenise (TS sl (sc + 1) (toks :< mktok False sl (sc + 1) Ident ",") cs)
|
||||||
|
'_' :: ',' :: '_' :: cs => rawTokenise (TS sl (sc + 3) (toks :< mktok False sl (sc + 3) MixFix "_,_") cs)
|
||||||
|
'_' :: '.' :: '_' :: cs => rawTokenise (TS sl (sc + 3) (toks :< mktok False sl (sc + 3) MixFix "_._") cs)
|
||||||
'\'' :: '\\' :: c :: '\'' :: cs =>
|
'\'' :: '\\' :: c :: '\'' :: cs =>
|
||||||
let ch = ifThenElse (c == 'n') '\n' c
|
let ch = ifThenElse (c == 'n') '\n' c
|
||||||
in tokenise' sl (sc + 4) (toks :< mktok False sl (sc + 4) Character (singleton ch)) cs
|
in rawTokenise (TS sl (sc + 4) (toks :< mktok False sl (sc + 4) Character (singleton ch)) cs)
|
||||||
'\'' :: c :: '\'' :: cs => tokenise' sl (sc + 3) (toks :< mktok False sl (sc + 3) Character (singleton c)) cs
|
'\'' :: c :: '\'' :: cs => rawTokenise (TS sl (sc + 3) (toks :< mktok False sl (sc + 3) Character (singleton c)) cs)
|
||||||
'#' :: cs => ?do_pragma -- we probably want to require at least one alpha?
|
'#' :: cs => ?do_pragma -- we probably want to require at least one alpha?
|
||||||
'-' :: '-' :: cs => lineComment sl (sc + 2) toks cs
|
'-' :: '-' :: cs => lineComment (TS sl (sc + 2) toks cs)
|
||||||
'/' :: '-' :: cs => blockComment sl (sc + 2) toks cs
|
'/' :: '-' :: cs => blockComment (TS sl (sc + 2) toks cs)
|
||||||
'`' :: cs => doBacktick sl (sc + 1) toks cs [<]
|
'`' :: cs => doBacktick (TS sl (sc + 1) toks cs) [<]
|
||||||
'"' :: cs => doQuote sl (sc + 1) toks cs [<]
|
'"' :: cs => doQuote (TS sl (sc + 1) toks cs) [<]
|
||||||
'.' :: cs => doRest sl (sc + 1) toks cs Projection isIdent (Lin :< '.')
|
'.' :: cs => doRest (TS sl (sc + 1) toks cs) Projection isIdent (Lin :< '.')
|
||||||
'-' :: c :: cs => if isDigit c
|
'-' :: c :: cs => if isDigit c
|
||||||
then doRest sl (sc + 2) toks cs Number isDigit (Lin :< '-' :< c)
|
then doRest (TS sl (sc + 2) toks cs) Number isDigit (Lin :< '-' :< c)
|
||||||
else doRest sl (sc + 1) toks (c :: cs) Ident isIdent (Lin :< '-')
|
else doRest (TS sl (sc + 1) toks (c :: cs)) Ident isIdent (Lin :< '-')
|
||||||
|
|
||||||
c :: cs => doChar c cs
|
c :: cs => doChar c cs
|
||||||
|
|
||||||
@@ -54,13 +105,13 @@ tokenise' sl sc toks chars = case chars of
|
|||||||
isUIdent : Char -> Bool
|
isUIdent : Char -> Bool
|
||||||
isUIdent c = isIdent c || c == '.'
|
isUIdent c = isIdent c || c == '.'
|
||||||
|
|
||||||
doBacktick : Int -> Int -> SnocList BTok -> List Char -> SnocList Char -> Either Error (List BTok)
|
doBacktick : TState -> SnocList Char -> Either Error TState
|
||||||
doBacktick l c toks Nil acc = Left $ E (MkFC "" (l,c)) "EOF in backtick string"
|
doBacktick (TS l c toks Nil) acc = Left $ E (MkFC "" (l,c)) "EOF in backtick string"
|
||||||
doBacktick el ec toks ('`' :: cs) acc =
|
doBacktick (TS el ec toks ('`' :: cs)) acc =
|
||||||
let tok = MkBounded (Tok JSLit (pack $ acc <>> [])) (MkBounds sl sc el ec) in
|
let tok = MkBounded (Tok JSLit (pack $ acc <>> [])) (MkBounds sl sc el ec) in
|
||||||
tokenise' el (ec + 1) (toks :< tok) cs
|
rawTokenise (TS el (ec + 1) (toks :< tok) cs)
|
||||||
doBacktick l c toks ('\n' :: cs) acc = doBacktick (l + 1) 0 toks cs (acc :< '\n')
|
doBacktick (TS l c toks ('\n' :: cs)) acc = doBacktick (TS (l + 1) 0 toks cs) (acc :< '\n')
|
||||||
doBacktick l c toks (ch :: cs) acc = doBacktick l (c + 1) toks cs (acc :< ch)
|
doBacktick (TS l c toks (ch :: cs)) acc = doBacktick (TS l (c + 1) toks cs) (acc :< ch)
|
||||||
|
|
||||||
|
|
||||||
-- temporary use same token as before
|
-- temporary use same token as before
|
||||||
@@ -68,42 +119,45 @@ tokenise' sl sc toks chars = case chars of
|
|||||||
mktok checkkw el ec kind text = let kind = if checkkw && elem text keywords then Keyword else kind in
|
mktok checkkw el ec kind text = let kind = if checkkw && elem text keywords then Keyword else kind in
|
||||||
MkBounded (Tok kind text) (MkBounds sl sc el ec)
|
MkBounded (Tok kind text) (MkBounds sl sc el ec)
|
||||||
|
|
||||||
lineComment : Int -> Int -> SnocList BTok -> List Char -> Either Error (List BTok)
|
lineComment : TState -> Either Error TState
|
||||||
lineComment line col toks Nil = tokenise' line col toks Nil
|
lineComment (TS line col toks Nil) = rawTokenise (TS line col toks Nil)
|
||||||
lineComment line col toks ('\n' :: cs) = tokenise' (line + 1) 0 toks cs
|
lineComment (TS line col toks ('\n' :: cs)) = rawTokenise (TS (line + 1) 0 toks cs)
|
||||||
lineComment line col toks (c :: cs) = lineComment line (col + 1) toks cs
|
lineComment (TS line col toks (c :: cs)) = lineComment (TS line (col + 1) toks cs)
|
||||||
|
|
||||||
blockComment : Int -> Int -> SnocList BTok -> List Char -> Either Error (List BTok)
|
blockComment : TState -> Either Error TState
|
||||||
blockComment line col toks Nil = Left $ E (MkFC "" (line, col)) "EOF in block comment"
|
blockComment (TS line col toks Nil) = Left $ E (MkFC "" (line, col)) "EOF in block comment"
|
||||||
blockComment line col toks ('-' :: '/' :: cs) = tokenise' line (col + 2) toks cs
|
blockComment (TS line col toks ('-' :: '/' :: cs)) = rawTokenise (TS line (col + 2) toks cs)
|
||||||
blockComment line col toks ('\n' :: cs) = blockComment (line + 1) 0 toks cs
|
blockComment (TS line col toks ('\n' :: cs)) = blockComment (TS (line + 1) 0 toks cs)
|
||||||
blockComment line col toks (c :: cs) = blockComment line (col + 1) toks cs
|
blockComment (TS line col toks (c :: cs)) = blockComment (TS line (col + 1) toks cs)
|
||||||
|
|
||||||
doRest : Int -> Int -> SnocList BTok -> List Char -> Kind -> (Char -> Bool) -> SnocList Char -> Either Error (List BTok)
|
doRest : TState -> Kind -> (Char -> Bool) -> SnocList Char -> Either Error TState
|
||||||
doRest l c toks Nil kind pred acc = tokenise' l c (toks :< mktok True l c kind (pack $ acc <>> [])) Nil
|
doRest (TS l c toks Nil) kind pred acc = rawTokenise (TS l c (toks :< mktok True l c kind (pack $ acc <>> [])) Nil)
|
||||||
doRest l c toks (ch :: cs) kind pred acc = if pred ch
|
doRest (TS l c toks (ch :: cs)) kind pred acc = if pred ch
|
||||||
then doRest l (c + 1) toks cs kind pred (acc :< ch)
|
then doRest (TS l (c + 1) toks cs) kind pred (acc :< ch)
|
||||||
else
|
else
|
||||||
let kind = if elem '_' acc then MixFix else kind in
|
let kind = if elem '_' acc then MixFix else kind in
|
||||||
tokenise' l c (toks :< mktok True l (c - 1) kind (pack $ acc <>> [])) (ch :: cs)
|
rawTokenise (TS l c (toks :< mktok True l (c - 1) kind (pack $ acc <>> [])) (ch :: cs))
|
||||||
|
|
||||||
doQuote : Int -> Int -> SnocList BTok -> List Char -> SnocList Char -> Either Error (List BTok)
|
doQuote : TState -> SnocList Char -> Either Error TState
|
||||||
-- should be an error..
|
-- should be an error..
|
||||||
doQuote line col toks Nil acc = ?missing_end_quote
|
doQuote (TS line col toks Nil) acc = ?missing_end_quote
|
||||||
doQuote line col toks ('\\' :: 'n' :: cs) acc = doQuote line (col + 2) toks cs (acc :< '\n')
|
doQuote (TS line col toks ('\\' :: 'n' :: cs)) acc = doQuote (TS line (col + 2) toks cs) (acc :< '\n')
|
||||||
doQuote line col toks ('\\' :: c :: cs) acc = doQuote line (col + 2) toks cs (acc :< c)
|
doQuote (TS line col toks ('\\' :: c :: cs)) acc = doQuote (TS line (col + 2) toks cs) (acc :< c)
|
||||||
doQuote line col toks ('\n' :: cs) acc = ?error_newline_in_quote
|
doQuote (TS line col toks ('\n' :: cs)) acc = ?error_newline_in_quote
|
||||||
doQuote line col toks ('"' :: cs) acc = tokenise' line (col + 1) (toks :< mktok False line (col + 1) StringKind (pack $ acc <>> [])) cs
|
doQuote (TS line col toks ('"' :: cs)) acc = rawTokenise (TS line (col + 1) (toks :< mktok False line (col + 1) StringKind (pack $ acc <>> [])) cs)
|
||||||
doQuote line col toks (c :: cs) acc = doQuote line (col + 1) toks cs (acc :< c)
|
doQuote (TS line col toks (c :: cs)) acc = doQuote (TS line (col + 1) toks cs) (acc :< c)
|
||||||
|
|
||||||
doChar : Char -> List Char -> Either Error (List BTok)
|
doChar : Char -> List Char -> Either Error TState
|
||||||
doChar c cs = if elem c standalone
|
doChar c cs = if elem c standalone
|
||||||
then tokenise' sl (sc + 1) (toks :< mktok True sl (sc + 1) Symbol (singleton c)) cs
|
then rawTokenise (TS sl (sc + 1) (toks :< mktok True sl (sc + 1) Symbol (singleton c)) cs)
|
||||||
else let kind = if isDigit c then Number else if isUpper c then UIdent else Ident in
|
else let kind = if isDigit c then Number else if isUpper c then UIdent else Ident in
|
||||||
doRest sl sc toks (c :: cs) kind isIdent [<]
|
doRest (TS sl sc toks (c :: cs)) kind isIdent [<]
|
||||||
|
|
||||||
export
|
export
|
||||||
tokenise : String -> String -> Either Error (List BTok)
|
tokenise : String -> String -> Either Error (List BTok)
|
||||||
tokenise fn text = case tokenise' 0 0 Lin (unpack text) of
|
tokenise fn text = case rawTokenise (TS 0 0 Lin (unpack text)) of
|
||||||
Left (E (MkFC file start) str) => Left (E (MkFC fn start) str)
|
Right (TS trow tcol acc []) => Right $ acc <>> []
|
||||||
res => res
|
Right (TS trow tcol acc chars) => Left $ E (MkFC fn (trow, tcol)) "Extra toks"
|
||||||
|
Left (E (MkFC file start) str) => Left $ E (MkFC fn start) str
|
||||||
|
Left err => Left err
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user