Files
newt/port/Lib/Parser.newt
2025-01-06 16:28:40 -08:00

667 lines
17 KiB
Agda
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
module Lib.Parser
-- NOW Still working on this.
import Data.String
import Lib.Parser.Impl
import Lib.Syntax
import Lib.Token
import Lib.Types
lazy : a. (Unit Parser a) Parser a
lazy pa = pa MkUnit
ident : Parser String
ident = token Ident <|> token MixFix
uident : Parser String
uident = token UIdent
parenWrap : a. Parser a -> Parser a
parenWrap pa = do
symbol "("
t <- pa
symbol ")"
pure t
braces : a. Parser a -> Parser a
braces pa = do
symbol "{"
t <- pa
symbol "}"
pure t
dbraces : a. Parser a -> Parser a
dbraces pa = do
symbol "{{"
t <- pa
symbol "}}"
pure t
optional : a. Parser a -> Parser (Maybe a)
optional pa = Just <$> pa <|> pure Nothing
stringLit : Parser Raw
stringLit = do
fc <- getPos
t <- token StringKind
pure $ RLit fc (LString t)
-- typeExpr is term with arrows.
typeExpr : Parser Raw
term : (Parser Raw)
interp : Parser Raw
interp = do
token StartInterp
tm <- term
token EndInterp
pure tm
interpString : Parser Raw
interpString = do
-- fc <- getPos
ignore $ token StartQuote
part <- term
parts <- many (stringLit <|> interp)
ignore $ token EndQuote
pure $ foldl append part parts
where
append : Raw -> Raw -> Raw
append t u =
let fc = getFC t in
(RApp (getFC t) (RApp fc (RVar fc "_++_") t Explicit) u Explicit)
intLit : Parser Raw
intLit = do
fc <- getPos
t <- token Number
pure $ RLit fc (LInt (stringToInt t))
charLit : Parser Raw
charLit = do
fc <- getPos
v <- token Character
pure $ RLit fc (LChar $ strIndex v 0)
lit : Parser Raw
lit = intLit <|> interpString <|> stringLit <|> charLit
-- helpful when we've got some / many and need FC for each
addPos : a. Parser a -> Parser (FC × a)
addPos pa = _,_ <$> getPos <*> pa
asAtom : Parser Raw
asAtom = do
fc <- getPos
nm <- ident
asPat <- optional $ keyword "@" *> parenWrap typeExpr
case asPat of
Just exp => pure $ RAs fc nm exp
Nothing => pure $ RVar fc nm
-- the inside of Raw
atom : Parser Raw
atom = do
pure MkUnit
RU <$> getPos <* keyword "U"
-- <|> RVar <$> getPos <*> ident
<|> asAtom
<|> RVar <$> getPos <*> uident
<|> RVar <$> getPos <*> token Projection
<|> lit
<|> RImplicit <$> getPos <* keyword "_"
<|> RHole <$> getPos <* keyword "?"
<|> parenWrap typeExpr
-- Argument to a Spine
pArg : Parser (Icit × FC × Raw)
pArg = do
fc <- getPos
(\x => Explicit, fc, x) <$> atom
<|> (\x => Implicit, fc, x) <$> braces typeExpr
<|> (\x => Auto, fc, x) <$> dbraces typeExpr
AppSpine : U
AppSpine = List (Icit × FC × Raw)
pratt : Operators -> Int -> String -> Raw -> AppSpine -> Parser (Raw × AppSpine)
pratt ops prec stop left spine = do
(left, spine) <- runPrefix stop left spine
let (left, spine) = projectHead left spine
let spine = runProject spine
case spine of
Nil => pure (left, Nil)
((Explicit, fc, tm@(RVar x nm)) :: rest) =>
if nm == stop then pure (left,spine) else
case lookupMap' nm ops of
Just (MkOp name p fix False rule) => if p < prec
then pure (left, spine)
else
runRule p fix stop rule (RApp fc (RVar fc name) left Explicit) rest
Just _ => fail "expected operator"
Nothing =>
if isPrefixOf "." nm
then pratt ops prec stop (RApp (getFC tm) tm left Explicit) rest
else pratt ops prec stop (RApp (getFC left) left tm Explicit) rest
((icit, fc, tm) :: rest) => pratt ops prec stop (RApp (getFC left) left tm icit) rest
where
projectHead : Raw -> AppSpine -> (Raw × AppSpine)
projectHead t sp@((Explicit, fc', RVar fc nm) :: rest) =
if isPrefixOf "." nm
then projectHead (RApp fc (RVar fc nm) t Explicit) rest
else (t,sp)
projectHead t sp = (t, sp)
-- we need to check left/AppSpine first
-- we have a case above for when the next token is a projection, but
-- we need this to make projection bind tighter than app
runProject : AppSpine -> AppSpine
runProject (t@(Explicit, fc', tm) :: u@(Explicit, _, RVar fc nm) :: rest) =
if isPrefixOf "." nm
then runProject ((Explicit, fc', RApp fc (RVar fc nm) tm Explicit) :: rest)
else (t :: u :: rest)
runProject tms = tms
-- left has our partially applied operator and we're picking up args
-- for the rest of the `_`
runRule : Int -> Fixity -> String -> List String -> Raw -> AppSpine -> Parser (Raw × AppSpine)
runRule p fix stop Nil left spine = pure (left, spine)
runRule p fix stop ("" :: Nil) left spine = do
let pr = case fix of
InfixR => p
_ => p + 1
case spine of
((_, fc, right) :: rest) => do
(right, rest) <- pratt ops pr stop right rest
pratt ops prec stop (RApp (getFC left) left right Explicit) rest
_ => fail "trailing operator"
runRule p fix stop (nm :: rule) left spine = do
case spine of
Nil => fail "short"
((_, _, right) :: rest) => do
(right,rest) <- pratt ops 0 nm right rest -- stop!!
let ((_,fc',RVar fc name) :: rest) = rest
| _ => fail "expected \{nm}"
if name == nm
then runRule p fix stop rule (RApp (getFC left) left right Explicit) rest
else fail "expected \{nm}"
-- run any prefix operators
runPrefix : String -> Raw -> AppSpine -> Parser (Raw × AppSpine)
runPrefix stop (RVar fc nm) spine =
case lookupMap' nm ops of
-- TODO False should be an error here
Just (MkOp name p fix True rule) => do
runRule p fix stop rule (RVar fc name) spine
_ =>
pure (left, spine)
runPrefix stop left spine = pure (left, spine)
parseOp : Parser Raw
parseOp = do
fc <- getPos
ops <- getOps
hd <- atom
rest <- many pArg
(res, Nil) <- pratt ops 0 "" hd rest
| _ => fail "extra stuff"
pure res
-- TODO case let? We see to only have it for `do`
-- try (keyword "let" >> symbol "(")
letExpr : Parser Raw
letExpr = do
keyword "let"
alts <- startBlock $ someSame $ letAssign
keyword' "in"
scope <- typeExpr
pure $ foldl mkLet scope (reverse alts)
where
mkLet : Raw String × FC × Maybe Raw × Raw Raw
mkLet acc (n,fc,ty,v) = RLet fc n (fromMaybe (RImplicit fc) ty) v acc
letAssign : Parser (Name × FC × Maybe Raw × Raw)
letAssign = do
fc <- getPos
name <- ident
-- TODO type assertion
ty <- optional (keyword ":" *> typeExpr)
keyword "="
t <- typeExpr
pure (name,fc,ty,t)
pLamArg : Parser (Icit × String × Maybe Raw)
pLamArg = impArg <|> autoArg <|> expArg
<|> (\ x => (Explicit, x, Nothing)) <$> (ident <|> uident)
<|> keyword "_" *> pure (Explicit, "_", Nothing)
where
impArg : Parser (Icit × String × Maybe Raw)
impArg = do
nm <- braces (ident <|> uident)
ty <- optional (symbol ":" >> typeExpr)
pure (Implicit, nm, ty)
autoArg : Parser (Icit × String × Maybe Raw)
autoArg = do
nm <- dbraces (ident <|> uident)
ty <- optional (symbol ":" >> typeExpr)
pure (Auto, nm, ty)
expArg : Parser (Icit × String × Maybe Raw)
expArg = do
nm <- parenWrap (ident <|> uident)
ty <- optional (symbol ":" >> typeExpr)
pure (Explicit, nm, ty)
lamExpr : Parser Raw
lamExpr = do
pos <- getPos
keyword "\\" <|> keyword "λ"
args <- some $ addPos pLamArg
keyword "=>"
scope <- typeExpr
pure $ foldr mkLam scope args
where
mkLam : FC × Icit × Name × Maybe Raw Raw Raw
mkLam (fc, icit, name, ty) sc = RLam fc (BI fc name icit Many) sc
caseAlt : Parser RCaseAlt
caseAlt = do
pure MkUnit
pat <- typeExpr
keyword "=>"
t <- term
pure $ MkAlt pat t
caseExpr : Parser Raw
caseExpr = do
fc <- getPos
keyword "case"
sc <- term
keyword "of"
alts <- startBlock $ someSame $ caseAlt
pure $ RCase fc sc alts
caseLamExpr : Parser Raw
caseLamExpr = do
fc <- getPos
try ((keyword "\\" <|> keyword "λ") *> keyword "case")
alts <- startBlock $ someSame $ caseAlt
pure $ RLam fc (BI fc "$case" Explicit Many) $ RCase fc (RVar fc "$case") alts
doExpr : Parser Raw
doStmt : Parser DoStmt
caseLet : Parser Raw
caseLet = do
-- look ahead so we can fall back to normal let
fc <- getPos
try (keyword "let" >> symbol "(")
pat <- typeExpr
symbol ")"
keyword "="
sc <- typeExpr
alts <- startBlock $ manySame $ symbol "|" *> caseAlt
keyword "in"
body <- term
pure $ RCase fc sc (MkAlt pat body :: alts)
doCaseLet : Parser DoStmt
doCaseLet = do
-- look ahead so we can fall back to normal let
-- Maybe make it work like arrow?
fc <- getPos
try (keyword "let" >> symbol "(")
pat <- typeExpr
symbol ")"
keyword "="
sc <- typeExpr
alts <- startBlock $ manySame $ symbol "|" *> caseAlt
bodyFC <- getPos
body <- RDo <$> getPos <*> someSame doStmt
pure $ DoExpr fc (RCase fc sc (MkAlt pat body :: alts))
doArrow : Parser DoStmt
doArrow = do
fc <- getPos
left <- typeExpr
(Just _) <- optional $ keyword "<-"
| _ => pure $ DoExpr fc left
right <- term
alts <- startBlock $ manySame $ symbol "|" *> caseAlt
pure $ DoArrow fc left right alts
doLet : Parser DoStmt
doLet = do
fc <- getPos
keyword "let"
nm <- ident
keyword "="
tm <- term
pure $ DoLet fc nm tm
doStmt
= doCaseLet
<|> doLet
<|> doArrow
doExpr = RDo <$> getPos <* keyword "do" <*> (startBlock $ someSame doStmt)
parseIfThen : Parser Raw
parseIfThen = do
fc <- getPos
keyword "if"
a <- term
keyword "then"
b <- term
keyword "else"
c <- term
pure $ RIf fc a b c
term' : Parser Raw
term' = caseExpr
<|> caseLet
<|> letExpr
<|> caseLamExpr
<|> lamExpr
<|> doExpr
<|> parseIfThen
-- Make this last for better error messages
<|> parseOp
term = do
t <- term'
rest <- many (_,_ <$> getPos <* keyword "$" <*> term')
pure $ apply t rest
where
apply : Raw -> List (FC × Raw) -> Raw
apply t Nil = t
apply t ((fc,x) :: xs) = RApp fc t (apply x xs) Explicit
varname : Parser String
varname = (ident <|> uident <|> keyword "_" *> pure "_")
quantity : Parser Quant
quantity = fromMaybe Many <$> optional (Zero <$ keyword "0")
ebind : Parser Telescope
ebind = do
-- don't commit until we see the ":"
symbol "("
quant <- quantity
names <- try (some (addPos varname) <* symbol ":")
ty <- typeExpr
symbol ")"
pure $ map (makeBind quant ty) names
where
makeBind : Quant Raw FC × String (BindInfo × Raw)
makeBind quant ty (pos, name) = (BI pos name Explicit quant, ty)
ibind : Parser Telescope
ibind = do
-- I've gone back and forth on this, but I think {m a b} is more useful than {Int}
symbol "{"
quant <- quantity
names <- (some (addPos varname))
ty <- optional (symbol ":" *> typeExpr)
symbol "}"
pure $ map (makeBind quant ty) names
where
makeBind : Quant Maybe Raw FC × String BindInfo × Raw
makeBind quant ty (pos, name) = (BI pos name Implicit quant, fromMaybe (RImplicit pos) ty)
abind : Parser Telescope
abind = do
-- for this, however, it would be nice to allow {{Monad A}}
symbol "{{"
name <- optional $ try (addPos varname <* symbol ":")
ty <- typeExpr
symbol "}}"
case name of
Just (pos,name) => pure ((BI pos name Auto Many, ty) :: Nil)
Nothing => pure ((BI (getFC ty) "_" Auto Many, ty) :: Nil)
arrow : Parser Unit
arrow = symbol "->" <|> symbol ""
-- Collect a bunch of binders (A : U) {y : A} -> ...
forAll : Parser Raw
forAll = do
keyword "forall" <|> keyword ""
all <- some (addPos varname)
keyword "."
scope <- typeExpr
pure $ foldr mkPi scope all
where
mkPi : FC × String Raw Raw
mkPi (fc, n) sc = RPi fc (BI fc n Implicit Zero) (RImplicit fc) sc
binders : Parser Raw
binders = do
binds <- many (abind <|> ibind <|> ebind)
arrow
scope <- typeExpr
pure $ foldr mkBind scope (join binds)
where
mkBind : (BindInfo × Raw) -> Raw -> Raw
mkBind (info, ty) scope = RPi (getFC info) info ty scope
typeExpr
= binders
<|> forAll
<|> (do
fc <- getPos
exp <- term
scope <- optional (arrow *> typeExpr)
case scope of
Nothing => pure exp
-- consider Maybe String to represent missing
(Just scope) => pure $ RPi fc (BI fc "_" Explicit Many) exp scope)
-- And top level stuff
parseSig : Parser Decl
parseSig = TypeSig <$> getPos <*> try (some (ident <|> uident <|> token Projection) <* keyword ":") <*> typeExpr
parseImport : Parser Import
parseImport = do
fc <- getPos
keyword "import"
ident <- uident
rest <- many $ token Projection
let name = joinBy "" (ident :: rest)
pure $ MkImport fc name
-- Do we do pattern stuff now? or just name = lambda?
-- TODO multiple names
parseMixfix : Parser Decl
parseMixfix = do
fc <- getPos
fix <- InfixL <$ keyword "infixl"
<|> InfixR <$ keyword "infixr"
<|> Infix <$ keyword "infix"
prec <- token Number
ops <- some $ token MixFix
for ops $ \ op => addOp op (cast prec) fix
pure $ PMixFix fc ops (cast prec) fix
getName : Raw -> Parser String
getName (RVar x nm) = pure nm
getName (RApp x t u icit) = getName t
getName tm = fail "bad LHS"
parseDef : Parser Decl
parseDef = do
fc <- getPos
t <- typeExpr
nm <- getName t
keyword "="
body <- typeExpr
wfc <- getPos
w <- optional $ do
keyword "where"
startBlock $ manySame $ (parseSig <|> parseDef)
let body = maybe body (\ decls => RWhere wfc decls body) w
-- these get collected later
pure $ Def fc nm ((t, body) :: Nil) -- (MkClause fc Nil t body :: Nil)
parsePType : Parser Decl
parsePType = do
fc <- getPos
keyword "ptype"
id <- uident
ty <- optional $ do
keyword ":"
typeExpr
pure $ PType fc id ty
parsePFunc : Parser Decl
parsePFunc = do
fc <- getPos
keyword "pfunc"
nm <- ident
used <- optional (keyword "uses" >> parenWrap (many $ uident <|> ident <|> token MixFix))
keyword ":"
ty <- typeExpr
keyword ":="
src <- token JSLit
pure $ PFunc fc nm (fromMaybe Nil used) ty src
parseShortData : Parser Decl
parseShortData = do
fc <- getPos
keyword "data"
lhs <- typeExpr
keyword "="
sigs <- sepBy (keyword "|") typeExpr
pure $ ShortData fc lhs sigs
parseData : Parser Decl
parseData = do
fc <- getPos
-- commit when we hit ":"
name <- try $ (keyword "data" *> (uident <|> ident <|> token MixFix) <* keyword ":")
ty <- typeExpr
keyword "where"
decls <- startBlock $ manySame $ parseSig
pure $ Data fc name ty decls
nakedBind : Parser Telescope
nakedBind = do
names <- some (addPos varname)
pure $ map makeBind names
where
makeBind : FC × String (BindInfo × Raw)
makeBind (pos, name) = (BI pos name Explicit Many, RImplicit pos)
parseRecord : Parser Decl
parseRecord = do
fc <- getPos
keyword "record"
name <- uident
teles <- many $ ebind <|> nakedBind
keyword "where"
cname <- optional $ keyword "constructor" *> (uident <|> token MixFix)
decls <- startBlock $ manySame $ parseSig
pure $ Record fc name (join teles) cname decls
parseClass : Parser Decl
parseClass = do
fc <- getPos
keyword "class"
name <- uident
teles <- many $ ebind <|> nakedBind
keyword "where"
decls <- startBlock $ manySame $ parseSig
pure $ Class fc name (join teles) decls
parseInstance : Parser Decl
parseInstance = do
fc <- getPos
keyword "instance"
ty <- typeExpr
-- is it a forward declaration
(Just _) <- optional $ keyword "where"
| _ => pure $ Instance fc ty Nothing
decls <- startBlock $ manySame $ parseDef
pure $ Instance fc ty (Just decls)
-- Not sure what I want here.
-- I can't get a Tm without a type, and then we're covered by the other stuff
parseNorm : Parser Decl
parseNorm = DCheck <$> getPos <* keyword "#check" <*> typeExpr <* keyword ":" <*> typeExpr
parseDecl : Parser Decl
parseDecl = parseMixfix <|> parsePType <|> parsePFunc
<|> parseNorm <|> parseData <|> parseShortData <|> parseSig <|> parseDef
<|> parseClass <|> parseInstance <|> parseRecord
parseModHeader : Parser (FC × String)
parseModHeader = do
sameLevel (keyword "module")
fc <- getPos
name <- uident
rest <- many $ token Projection
-- FIXME use QName
let name = joinBy "" (name :: rest)
pure (fc, name)
parseImports : Parser (List Import)
parseImports = manySame parseImport
parseMod : Parser Module
parseMod = do
sameLevel (keyword "module")
name <- uident
rest <- many $ token Projection
imports <- manySame parseImport
decls <- manySame parseDecl
let name = joinBy "" (name :: rest)
pure $ MkModule name imports decls
-- data ReplCmd =
-- Def Decl
-- | Norm Raw -- or just name?
-- | Check Raw
-- -- Eventually I'd like immediate actions in the file, like lean, but I
-- -- also want to REPL to work and we can do that first.
-- parseRepl : Parser ReplCmd
-- parseRepl = Def <$> parseDecl <|> Norm <$ keyword "#nf" <*> typeExpr
-- <|> Check <$ keyword "#check" <*> typeExpr