From 331d966ef3092d8fe49117ae89c7ea3ca5870ca6 Mon Sep 17 00:00:00 2001 From: Steve Dunham Date: Sat, 14 Sep 2024 14:46:04 -0700 Subject: [PATCH] drop commit/mustWork for parsec approach. allow mixfix constructors --- TODO.md | 3 +- newt/oper.newt | 6 +++- src/Lib/Parser.idr | 70 +++++++++++++++++------------------------ src/Lib/Parser/Impl.idr | 25 +++++---------- src/Lib/Tokenizer.idr | 7 +++-- 5 files changed, 49 insertions(+), 62 deletions(-) diff --git a/TODO.md b/TODO.md index 32dfb04..b94ac0b 100644 --- a/TODO.md +++ b/TODO.md @@ -3,7 +3,8 @@ I may be done with `U` - I keep typing `Type`. -- [ ] type constructors are no longer generated? And seem to have 0 arity. +- [x] switch from commit/mustWork to checking progress +- [x] type constructors are no longer generated? And seem to have 0 arity. - [ ] raw let is not yet implemented (although define used by case tree building) - [x] there is some zero argument application in generated code - [x] get equality.newt to work diff --git a/newt/oper.newt b/newt/oper.newt index 339aa6e..6e4164e 100644 --- a/newt/oper.newt +++ b/newt/oper.newt @@ -38,4 +38,8 @@ data Pair : U -> U -> U where _,_ : {A B : U} -> A -> B -> Pair A B blah : Int -> Int -> Int -> Pair Int (Pair Int Int) -blah x y z = (x , y, z) +blah x y z = (x, y, z) + +curryPlus : Pair Int Int -> Int +curryPlus (_,_ a b) = a + b +-- curryPlus (a , b) = a + b diff --git a/src/Lib/Parser.idr b/src/Lib/Parser.idr index c73f9b6..5c0cea1 100644 --- a/src/Lib/Parser.idr +++ b/src/Lib/Parser.idr @@ -94,9 +94,9 @@ parseOp = parseApp >>= go 0 where go : Int -> Raw -> Parser Raw go prec left = - do - fc <- getPos + try (do op <- token Oper + fc <- getPos ops <- getOps let op' = "_" ++ op ++ "_" let Just (p,fix) = lookup op' ops @@ -108,14 +108,13 @@ parseOp = parseApp >>= go 0 if p >= prec then pure () else fail "" let pr = case fix of InfixR => p; _ => p + 1 right <- go pr !(parseApp) - go prec (RApp fc (RApp fc (RVar fc op') left Explicit) right Explicit) + go prec (RApp fc (RApp fc (RVar fc op') left Explicit) right Explicit)) <|> pure left export letExpr : Parser Raw letExpr = do keyword "let" - commit alts <- startBlock $ someSame $ letAssign keyword' "in" scope <- typeExpr @@ -142,7 +141,6 @@ export lamExpr : Parser Raw lamExpr = do keyword "\\" <|> keyword "λ" - commit args <- some pLetArg keyword "=>" scope <- typeExpr @@ -163,19 +161,18 @@ patAtom = do fc <- getPos PatWild fc Explicit <$ keyword "_" <|> PatVar fc Explicit <$> ident - <|> PatCon fc Explicit <$> uident <*> pure [] + <|> PatCon fc Explicit <$> (uident <|> token MixFix) <*> pure [] <|> braces (PatVar fc Implicit <$> ident) <|> braces (PatWild fc Implicit <$ keyword "_") - <|> braces (PatCon fc Implicit <$> uident <*> many patAtom) + <|> braces (PatCon fc Implicit <$> (uident <|> token MixFix) <*> many patAtom) <|> parens pPattern -pPattern = PatCon (!getPos) Explicit <$> uident <*> many patAtom <|> patAtom +pPattern = PatCon (!getPos) Explicit <$> (uident <|> token MixFix) <*> many patAtom <|> patAtom caseAlt : Parser RCaseAlt caseAlt = do pat <- pPattern keyword "=>" - commit t <- term pure $ MkAlt pat t @@ -183,7 +180,6 @@ export caseExpr : Parser Raw caseExpr = do keyword "case" - commit sc <- term keyword "of" alts <- startBlock $ someSame $ caseAlt @@ -208,13 +204,11 @@ ebind = do ibind : Parser (List (String, Icit, Raw)) ibind = do sym "{" - mustWork $ do - names <- some (ident <|> uident) - ty <- optional (sym ":" >> typeExpr) - pos <- getPos - sym "}" - -- getPos is a hack here, I would like to position at the name... - pure $ map (\name => (name, Implicit, fromMaybe (RImplicit pos) ty)) names + names <- some (ident <|> uident) + ty <- optional (sym ":" >> typeExpr) + pos <- getPos + sym "}" + pure $ map (\name => (name, Implicit, fromMaybe (RImplicit pos) ty)) names arrow : Parser Unit arrow = sym "->" <|> sym "→" @@ -222,9 +216,8 @@ arrow = sym "->" <|> sym "→" -- Collect a bunch of binders (A : U) {y : A} -> ... binders : Parser Raw binders = do - binds <- many (ibind <|> ebind) + binds <- many (ibind <|> try ebind) arrow - commit scope <- typeExpr fc <- getPos pure $ foldr (mkBind fc) scope (join binds) @@ -235,7 +228,7 @@ binders = do typeExpr = binders <|> do exp <- term - scope <- optional (arrow *> mustWork typeExpr) + scope <- optional (arrow *> typeExpr) case scope of Nothing => pure exp -- consider Maybe String to represent missing @@ -247,10 +240,10 @@ typeExpr = binders export parseSig : Parser Decl -parseSig = TypeSig <$> getPos <*> (ident <|> uident) <* keyword ":" <*> mustWork typeExpr +parseSig = TypeSig <$> getPos <*> (ident <|> uident) <* keyword ":" <*> typeExpr parseImport : Parser Decl -parseImport = DImport <$> getPos <* keyword "import" <* commit <*> uident +parseImport = DImport <$> getPos <* keyword "import" <*> uident -- Do we do pattern stuff now? or just name = lambda? @@ -260,11 +253,10 @@ parseMixfix = do fix <- InfixL <$ keyword "infixl" <|> InfixR <$ keyword "infixr" <|> Infix <$ keyword "infix" - mustWork $ do - prec <- token Number - op <- token MixFix - addOp op (cast prec) fix - pure $ PMixFix fc op (cast prec) fix + prec <- token Number + op <- token MixFix + addOp op (cast prec) fix + pure $ PMixFix fc op (cast prec) fix export parseDef : Parser Decl @@ -273,7 +265,7 @@ parseDef = do nm <- ident <|> uident pats <- many patAtom keyword "=" - body <- mustWork typeExpr + body <- typeExpr -- these get collected later pure $ Def fc nm [MkClause fc [] pats body] @@ -285,7 +277,7 @@ parsePType = do id <- uident ty <- optional $ do keyword ":" - mustWork typeExpr + typeExpr pure $ PType fc id ty parsePFunc : Parser Decl @@ -296,7 +288,7 @@ parsePFunc = do keyword ":" ty <- typeExpr keyword ":=" - src <- mustWork (cast <$> token StringKind) + src <- cast <$> token StringKind pure $ PFunc fc nm ty src export @@ -304,16 +296,12 @@ parseData : Parser Decl parseData = do fc <- getPos keyword "data" - -- FIXME - switch from mustWork / commit to checking if we've consumed tokens - mustWork $ do - name <- uident - keyword ":" - ty <- typeExpr - keyword "where" - commit - decls <- startBlock $ manySame $ parseSig - -- TODO - turn decls into something more useful - pure $ Data fc name ty decls + name <- uident + keyword ":" + ty <- typeExpr + keyword "where" + decls <- startBlock $ manySame $ parseSig + pure $ Data fc name ty decls -- Not sure what I want here. -- I can't get a Tm without a type, and then we're covered by the other stuff @@ -322,7 +310,7 @@ parseNorm = DCheck <$> getPos <* keyword "#check" <*> typeExpr <* keyword ":" <* export parseDecl : Parser Decl -parseDecl = parseMixfix <|> parsePType <|> parsePFunc <|> parseImport <|> parseNorm <|> parseData <|> parseSig <|> parseDef +parseDecl = parseMixfix <|> parsePType <|> parsePFunc <|> parseImport <|> parseNorm <|> parseData <|> (try $ parseSig) <|> parseDef export parseMod : Parser Module diff --git a/src/Lib/Parser/Impl.idr b/src/Lib/Parser/Impl.idr index 44bce9a..1f2ae6c 100644 --- a/src/Lib/Parser/Impl.idr +++ b/src/Lib/Parser/Impl.idr @@ -1,13 +1,5 @@ module Lib.Parser.Impl --- This follows Idris, not sure why I did that because commit / mustWork is messy --- and painful to work with. I _think_ a commit on consumption of anything, like parsec --- would work better. - --- Perhaps we can set the commit flag on consumption and get that with minor changes. - --- TODO see what Kovacs' flatparse does for error handling / <|> - import Lib.Token import Data.String import Data.Nat @@ -101,6 +93,12 @@ parse pa toks = case runP pa toks False [] (-1,-1) of -- I think I want to drop the typeclasses for v1 +export +try : Parser a -> Parser a +try (P pa) = P $ \toks,com,ops,col => case pa toks com ops col of + (Fail x err toks com ops) => Fail x err toks False ops + res => res + export fail : String -> Parser a fail msg = P $ \toks,com,ops,col => Fail False (error toks msg) toks com ops @@ -109,13 +107,6 @@ export fatal : String -> Parser a fatal msg = P $ \toks,com,ops,col => Fail True (error toks msg) toks com ops --- mustWork / commit copied from Idris, but I may switch to the parsec consumption thing. -export -mustWork : Parser a -> Parser a -mustWork (P pa) = P $ \ toks, com, ops, col => case (pa toks com ops col) of - Fail x err xs y ops => Fail True err xs y ops - res => res - export getOps : Parser (List (String, Int, Fixity)) getOps = P $ \ toks, com, ops, col => OK ops toks com ops @@ -147,7 +138,7 @@ export case pa toks False ops col of OK a toks' _ ops => OK a toks' com ops Fail True err toks' com ops => Fail True err toks' com ops - Fail fatal err toks' True ops => Fail fatal err toks' com ops + Fail fatal err toks' True ops => Fail fatal err toks' True ops Fail fatal err toks' False ops => pb toks com ops col export @@ -161,7 +152,7 @@ Monad Parser where pred : (BTok -> Bool) -> String -> Parser String pred f msg = P $ \toks,com,ops,col => case toks of - (t :: ts) => if f t then OK (value t) ts com ops else Fail False (error toks "\{msg} at \{show $ kind t}:\{value t}") toks com ops + (t :: ts) => if f t then OK (value t) ts True ops else Fail False (error toks "\{msg} at \{show $ kind t}:\{value t}") toks com ops [] => Fail False (error toks "\{msg} at EOF") toks com ops export diff --git a/src/Lib/Tokenizer.idr b/src/Lib/Tokenizer.idr index 397ffc8..3d32ddc 100644 --- a/src/Lib/Tokenizer.idr +++ b/src/Lib/Tokenizer.idr @@ -17,6 +17,9 @@ checkKW s = if elem s keywords then Tok Keyword s else Tok Ident s checkUKW : String -> Token Kind checkUKW s = if elem s keywords then Tok Keyword s else Tok UIdent s +checkOp : String -> Token Kind +checkOp s = if elem s specialOps then Tok Keyword s else Tok Oper s + isOpChar : Char -> Bool isOpChar c = c `elem` (unpack ":!#$%&*+./<=>?@\\^|-~") @@ -54,8 +57,8 @@ rawTokens <|> match (quo <+> manyUntil quo ((esc any <+> any) <|> any) <+> opt quo) (Tok StringKind . unquote) <|> match (lineComment (exact "--")) (Tok Space) <|> match (blockComment (exact "/-") (exact "-/")) (Tok Space) - <|> match (exact ",") (\s => Tok Oper s) - <|> match (some opChar) (\s => Tok Oper s) + <|> match (exact ",") (Tok Oper) + <|> match (some opChar) checkOp <|> match symbol (Tok Symbol) <|> match spaces (Tok Space)