From ed3ee96df93f1cd95f11d968ad69535858ea8a5c Mon Sep 17 00:00:00 2001 From: Steve Dunham Date: Sat, 20 May 2023 22:54:01 -0700 Subject: [PATCH] parser good enough to elab kovacs stuff --- eg/ex.newt | 13 ++-- src/Lib/Parser.idr | 132 ++++++++++++++++++++++++++++------------ src/Lib/Parser/Impl.idr | 54 +++++++++------- src/Syntax.idr | 12 ++-- 4 files changed, 138 insertions(+), 73 deletions(-) diff --git a/eg/ex.newt b/eg/ex.newt index d3ed540..d905683 100644 --- a/eg/ex.newt +++ b/eg/ex.newt @@ -12,10 +12,13 @@ id : a -> a -- declaration id = \ a => a * a + 2 * (3 + x) --- this is complicated with patterns because we need to group stuff together. --- I really should make a simple grammar - --- I want to put this on ice, there is so much to do before patterns.. - blah : Either a a -> a blah = \ x => let x = 1 in x * x + +bar = foo {x} 1 +blah = \ _ => 1 + + +next : (A : Type) -> (x : A) -> A + +next : {A : Type} -> (x : A) -> A diff --git a/src/Lib/Parser.idr b/src/Lib/Parser.idr index 89627e6..48c4170 100644 --- a/src/Lib/Parser.idr +++ b/src/Lib/Parser.idr @@ -1,5 +1,12 @@ module Lib.Parser +-- NEXT - need to sort out parsing implicits +-- +-- app: foo {a} a b +-- lam: λ {A} {b : A} (c : Blah) d e f. something +-- pi: (A : Set) -> {b : A} -> (c : Foo b) -> c -> bar d + + import Lib.Token import Lib.Parser.Impl @@ -27,33 +34,40 @@ parens pa = do sym ")" pure t +braces : Parser a -> Parser a +braces pa = do + sym "{" + t <- pa + sym "}" + pure t + + +optional : Parser a -> Parser (Maybe a) +optional pa = Just <$> pa <|> pure Nothing + lit : Parser Raw lit = do t <- token Number pure $ RLit (LInt (cast t)) -export -term : (Parser Raw) +-- I can haz arrows +export typeExpr : Parser Raw +export term : (Parser Raw) withPos : Parser Raw -> Parser Raw withPos p = RSrcPos <$> getPos <*> p --- ( t : ty ), (t , u) (t) --- Or do we want (x : ty) I think we may need to annotate any Raw -parenThinger : Parser Raw -parenThinger = do - keyword "(" - t <- term - -- And now we want ) : or , - -- we could do this with backtracing, but it'd kinda suck? - fail "todo" - -- the inside of Raw atom : Parser Raw -atom = lit - <|> withPos (RVar <$> ident) +atom = withPos ( RVar <$> ident + <|> lit + <|> RU <$ keyword "U" + <|> RHole <$ keyword "_") <|> parens term - -- <|> sym "(" *> Raw <* sym ")" + +-- Argument to a Spine +pArg : Parser (Plicity,Raw) +pArg = (Explicit,) <$> atom <|> (Implicit,) <$> braces term -- -- atom is lit or ident @@ -63,8 +77,7 @@ data Fixity = InfixL | InfixR | Infix -- starter pack, but we'll move some to prelude operators : List (String, Int, Fixity) operators = [ - ("->", 1, InfixR), - ("=", 2, InfixL), -- REVIEW L R ? + ("=",2,Infix), ("+",4,InfixL), ("-",4,InfixL), ("*",5,InfixL), @@ -73,8 +86,8 @@ operators = [ parseApp : Parser Raw parseApp = do hd <- atom - rest <- many atom - pure $ foldl RApp hd rest + rest <- many pArg + pure $ foldl (\a, (c,b) => RApp a b c) hd rest parseOp : Lazy (Parser Raw) parseOp = parseApp >>= go 0 @@ -85,13 +98,10 @@ parseOp = parseApp >>= go 0 op <- token Oper let Just (p,fix) = lookup op operators | Nothing => fail "expected operator" - -- if p >= prec then pure () else fail "" - guard $ p >= prec - -- commit + if p >= prec then pure () else fail "" let pr = case fix of InfixR => p; _ => p + 1 - -- commit? right <- go pr !(parseApp) - go prec (RApp (RApp (RVar op) left) right) + go prec (RApp (RApp (RVar op) left Explicit) right Explicit) <|> pure left export @@ -103,29 +113,37 @@ letExpr = do keyword' "in" scope <- term pure $ RLet alts scope - -- Let <$ keyword "let" <*> ident <* keyword "=" <*> Raw <* keyword "in" <*> Raw where letAssign : Parser (Name,Raw) letAssign = do name <- ident + -- TODO type assertion keyword "=" t <- term pure (name,t) -pPattern : Parser Pattern -pPattern - = PatWild <$ keyword "_" - <|> PatVar <$> ident +pLetArg : Parser (Plicity, String, Maybe Raw) +pLetArg = (Implicit,,) <$> braces ident <*> optional (sym ":" >> typeExpr) + <|> (Explicit,,) <$> parens ident <*> optional (sym ":" >> typeExpr) + <|> (Explicit,,Nothing) <$> ident + <|> (Explicit,"_",Nothing) <$ keyword "_" +-- lam: λ {A} {b : A} (c : Blah) d e f. something export lamExpr : Parser Raw lamExpr = do keyword "\\" commit - name <- pPattern + (icit, name, ty) <- pLetArg keyword "=>" scope <- term - pure $ RLam name scope + -- TODO optional type + pure $ RLam name icit scope + +pPattern : Parser Pattern +pPattern + = PatWild <$ keyword "_" + <|> PatVar <$> ident caseAlt : Parser CaseAlt @@ -146,29 +164,65 @@ caseExpr = do alts <- startBlock $ someSame $ caseAlt pure $ RCase sc alts -term = defer $ \_ => - caseExpr +term = caseExpr <|> letExpr <|> lamExpr <|> parseOp +expBinder : Parser Raw +expBinder = do + sym "(" + name <- ident + sym ":" + ty <- typeExpr + sym ")" + sym "->" + scope <- typeExpr + pure $ RPi name Explicit ty scope + +impBinder : Parser Raw +impBinder = do + sym "{" + name <- ident + sym ":" + ty <- typeExpr + sym "}" + sym "->" + scope <- typeExpr + pure $ RPi name Implicit ty scope + +-- something binder looking +-- todo sepby space or whatever +export +binder : Parser Raw +binder = expBinder <|> impBinder + + +typeExpr = binder + <|> do + exp <- term + scope <- optional (sym "->" *> mustWork typeExpr) + case scope of + Nothing => pure exp + -- consider Maybe String to represent missing + (Just scope) => pure $ RPi "_" Explicit exp scope + + -- And top level stuff -optional : Parser a -> Parser (Maybe a) -optional pa = Just <$> pa <|> pure Nothing export parseSig : Parser Decl -parseSig = TypeSig <$> ident <* keyword ":" <*> term +parseSig = TypeSig <$> ident <* keyword ":" <*> mustWork typeExpr parseImport : Parser Decl -parseImport = DImport <$ keyword "import" <*> ident +parseImport = DImport <$ keyword "import" <* commit <*> ident -- Do we do pattern stuff now? or just name = lambda? export parseDef : Parser Decl -parseDef = Def <$> ident <* keyword "=" <*> term +parseDef = Def <$> ident <* keyword "=" <*> mustWork typeExpr export parseData : Parser Decl @@ -176,7 +230,7 @@ parseData = do keyword "data" name <- ident keyword ":" - ty <- term + ty <- typeExpr keyword "where" decls <- startBlock $ someSame $ parseSig -- TODO - turn decls into something more useful diff --git a/src/Lib/Parser/Impl.idr b/src/Lib/Parser/Impl.idr index 18a40db..f37b29e 100644 --- a/src/Lib/Parser/Impl.idr +++ b/src/Lib/Parser/Impl.idr @@ -23,12 +23,12 @@ data Error = E String public export data Result : Type -> Type where OK : a -> (toks : TokenList) -> (com : Bool) -> Result a - Fail : Error -> (toks : TokenList) -> (com : Bool) -> Result a + Fail : Bool -> Error -> (toks : TokenList) -> (com : Bool) -> Result a export Functor Result where map f (OK a toks com ) = OK (f a) toks com - map _ (Fail err toks com) = Fail err toks com + map _ (Fail fatal err toks com) = Fail fatal err toks com -- So sixty just has a newtype function here now (probably for perf). -- A record might be more ergonomic, but would require a record impl before @@ -48,7 +48,7 @@ runP (P f) = f export parse : Parser a -> TokenList -> Either String a parse pa toks = case runP pa toks False emptyPos of - Fail (E msg) toks com => Left "error: \{msg} next at: \{show toks}" + Fail fatal (E msg) toks com => Left "error: \{msg} next at: \{show toks}" OK a [] _ => Right a OK a ts _ => Left "Extra toks \{show ts}" @@ -56,7 +56,18 @@ parse pa toks = case runP pa toks False emptyPos of export fail : String -> Parser a -fail msg = P $ \toks,com,col => Fail (E msg) toks com +fail msg = P $ \toks,com,col => Fail False (E msg) toks com + +export +fatal : String -> Parser a +fatal msg = P $ \toks,com,col => Fail False (E msg) toks com + +-- mustWork / commit copied from Idris, but I may switch to the parsec consumption thing. +export +mustWork : Parser a -> Parser a +mustWork (P pa) = P $ \ toks, com, col => case (pa toks com col) of + Fail x err xs y => Fail True err xs y + res => res export Functor Parser where @@ -67,36 +78,36 @@ Applicative Parser where pure pa = P (\ toks, com, col => OK pa toks com) P pab <*> P pa = P $ \toks,com,col => case pab toks com col of - Fail err toks com => Fail err toks com + Fail fatal err toks com => Fail fatal err toks com OK f toks com => case pa toks com col of (OK x toks com) => OK (f x) toks com - (Fail err toks com) => Fail err toks com + (Fail fatal err toks com) => Fail fatal err toks com -- REVIEW it would be nice if the second argument was lazy... export -Alternative Parser where - empty = fail "empty" - (P pa) <|> (P pb) = P $ \toks,com,col => - case pa toks False col of - OK a toks' _ => OK a toks' com - Fail err toks' True => Fail err toks' com - Fail err toks' False => pb toks com col +(<|>) : Parser a -> Lazy (Parser a) -> Parser a +(P pa) <|> (P pb) = P $ \toks,com,col => + case pa toks False col of + OK a toks' _ => OK a toks' com + Fail True err toks' com => Fail True err toks' com + Fail fatal err toks' True => Fail fatal err toks' com + Fail fatal err toks' False => pb toks com col export Monad Parser where P pa >>= pab = P $ \toks,com,col => case pa toks com col of (OK a toks com) => runP (pab a) toks com col - (Fail err xs x) => Fail err xs x + (Fail fatal err xs x) => Fail fatal err xs x -- do we need this? pred : (BTok -> Bool) -> String -> Parser String pred f msg = P $ \toks,com,col => case toks of - (t :: ts) => if f t then OK (value t) ts com else Fail (E "\{msg} vt:\{value t}") toks com - [] => Fail (E "eof") toks com + (t :: ts) => if f t then OK (value t) ts com else Fail False (E "\{msg} vt:\{value t}") toks com + [] => Fail False (E "eof") toks com export commit : Parser () @@ -106,9 +117,7 @@ export defer : (() -> (Parser a)) -> Parser a defer f = P $ \toks,com,col => runP (f ()) toks com col - mutual - export some : Parser a -> Parser (List a) some p = defer $ \_ => [| p :: many p|] --(::) <$> p <*> many p) @@ -124,7 +133,7 @@ mutual export getPos : Parser SourcePos getPos = P $ \toks,com, (l,c) => case toks of - [] => Fail (E "End of file") toks com -- OK emptyPos toks com + [] => Fail False (E "End of file") toks com -- OK emptyPos toks com (t :: ts) => OK (start t) toks com ||| Start an indented block and run parser in it @@ -145,8 +154,8 @@ sameLevel (P p) = P $ \toks,com,(l,c) => case toks of (t :: _) => let (tl,tc) = start t in if tc == c then p toks com (tl, c) - else if c < tc then Fail (E "unexpected indent") toks com - else Fail (E "unexpected indent") toks com + else if c < tc then Fail False (E "unexpected indent") toks com + else Fail False (E "unexpected indent") toks com export someSame : Parser a -> Parser (List a) @@ -160,13 +169,12 @@ indented (P p) = P $ \toks,com,(l,c) => case toks of (t::_) => let (tl,tc) = start t in if tc > c || tl == l then p toks com (l,c) - else Fail (E "unexpected outdent") toks com + else Fail False (E "unexpected outdent") toks com export token' : Kind -> Parser String token' k = pred (\t => t.val.kind == k) "Expected a \{show k} token" - export keyword' : String -> Parser () keyword' kw = ignore $ pred (\t => t.val.text == kw) "Expected \{kw}" diff --git a/src/Syntax.idr b/src/Syntax.idr index 82c9812..dc12026 100644 --- a/src/Syntax.idr +++ b/src/Syntax.idr @@ -32,8 +32,8 @@ data CaseAlt = MkAlt Pattern Raw public export data Raw = RVar Name - | RLam Pattern Raw - | RApp Raw Raw + | RLam String Plicity Raw + | RApp Raw Raw Plicity | RU | RPi Name Plicity Raw Raw | RLet (List (Name, Raw)) Raw @@ -42,7 +42,7 @@ data Raw | RAnn Raw Raw | RLit Literal | RCase Raw (List CaseAlt) - | RWildcard + | RHole | RParseError String -- derive some stuff - I'd like json, eq, show, ... @@ -122,14 +122,14 @@ Show Plicity where covering Show Raw where - show RWildcard = "Wildcard" + show RHole = "_" show (RVar name) = foo ["RVar", show name] show (RAnn t ty) = foo [ "RAnn", show t, show ty] show (RLit x) = foo [ "RLit", show x] show (RLet alts y) = foo [ "Let", show alts, show y] show (RPi str x y z) = foo [ "Pi", show str, show x, show y, show z] - show (RApp x y) = foo [ "App", show x, show y] - show (RLam x y) = foo [ "Lam", show x, show y] + show (RApp x y z) = foo [ "App", show x, show y, show z] + show (RLam x i y) = foo [ "Lam", show x, show i, show y] show (RCase x xs) = foo [ "Case", show x, show xs] show (RParseError str) = foo [ "ParseError", "str"] show RU = "U"