Parsing updates for unicode
- Allow unicode characters in indents and operators - Show lexing errors
This commit is contained in:
1
.gitignore
vendored
1
.gitignore
vendored
@@ -7,3 +7,4 @@ build/
|
||||
*.agdai
|
||||
*.js
|
||||
input.txt
|
||||
node_modules
|
||||
|
||||
22
TODO.md
22
TODO.md
@@ -3,15 +3,24 @@
|
||||
|
||||
- [ ] Allow unicode operators/names
|
||||
- refactored parser to prep for this
|
||||
- [ ] get rid of stray INFO from auto resolution
|
||||
- [ ] handle if_then_else_j
|
||||
- [ ] Web tool
|
||||
- edit, view output, view js, run js, monaco would be nice.
|
||||
- need to shim out Buffer
|
||||
- [x] get rid of stray INFO from auto resolution
|
||||
- [ ] handle if_then_else_ style mixfix
|
||||
- [ ] Search should look at context
|
||||
- [ ] records
|
||||
- [ ] copattern matching
|
||||
- [ ] Support @ on the LHS
|
||||
- [x] Remember operators from imports
|
||||
- [ ] Default cases for non-primitives (currently gets expanded to all constructors)
|
||||
- This may need a little care. But I think I could collect all constructors that only match wildcards into a single case. This would lose any information from the individual, unnamed cases though.
|
||||
- There are cases where we have `_` and then `Foo` on the next line, but they should all get collected into the `Foo` case. I think I sorted all of this out for primitives.
|
||||
- [x] Case for primitives
|
||||
- [ ] aoc2023 translation
|
||||
- [x] day1
|
||||
- [x] day2
|
||||
- some "real world" examples -v
|
||||
- some "real world" examples
|
||||
- [x] Maybe Eq and stuff would work for typeclass without dealing with unification issues yet
|
||||
- [x] unsolved meta errors repeat (need to freeze or only report at end)
|
||||
- [x] Sanitize JS idents, e.g. `_+_`
|
||||
@@ -22,7 +31,6 @@
|
||||
leave that implicit for efficiency. I think it would also make printing more readable.
|
||||
- When printing `Value`, I now print the spine size instead of spine.
|
||||
- [x] eval for case (see order.newt)
|
||||
- [ ] dynamic pattern unification (add test case first)
|
||||
- [x] switch from commit/mustWork to checking progress
|
||||
- [x] type constructors are no longer generated? And seem to have 0 arity.
|
||||
- [x] raw let is not yet implemented (although define used by case tree building)
|
||||
@@ -60,17 +68,19 @@
|
||||
- [ ] magic nat (codegen as number with appropriate pattern matching)
|
||||
- [ ] magic tuple? (codegen as array)
|
||||
- [ ] magic newtype? (drop them in codegen)
|
||||
- [ ] records / copatterns
|
||||
- [x] vscode: syntax highlighting for String
|
||||
- [ ] add `pop` or variant of `pfunc` that maps to an operator, giving the js operator and precedence on RHS
|
||||
|
||||
### Parsing
|
||||
|
||||
- [ ] consider allowing σ etc in identifiers
|
||||
- Probably need to merge oper / ident first and sort out mixfix in parsing.
|
||||
- Probably need to merge oper / ident first and sort out mixfix in parsing
|
||||
- The mixfix parsing can handle this now, need to update lexing.
|
||||
- [ ] Parse error not ideal for `\x y z b=> b` (points to lambda)
|
||||
|
||||
### Background
|
||||
|
||||
- [ ] Read Ulf Norell thesis
|
||||
- [ ] Finish reading dynamic pattern unification paper to see what is missing/wrong with the current implementation
|
||||
|
||||
|
||||
|
||||
40
newt/Combinatory.newt
Normal file
40
newt/Combinatory.newt
Normal file
@@ -0,0 +1,40 @@
|
||||
module Combinatory
|
||||
|
||||
data Unit : U where
|
||||
MkUnit : Unit
|
||||
|
||||
infixr 7 _::_
|
||||
data List : U -> U where
|
||||
Nil : {A : U} -> List A
|
||||
_::_ : {A : U} -> A -> List A -> List A
|
||||
|
||||
-- prj/menagerie/papers/combinatory
|
||||
|
||||
infixr 6 _~>_
|
||||
data Type : U where
|
||||
ι : Type
|
||||
_~>_ : Type -> Type -> Type
|
||||
|
||||
A : U
|
||||
A = Unit
|
||||
|
||||
Val : Type -> U
|
||||
Val ι = A
|
||||
Val (x ~> y) = Val x -> Val y
|
||||
|
||||
Ctx : U
|
||||
Ctx = List Type
|
||||
|
||||
data Ref : Type -> Ctx -> U where
|
||||
Z : {σ : Type} {Γ : Ctx} -> Ref σ (σ :: Γ)
|
||||
S : {σ τ : Type} {Γ : Ctx} -> Ref σ Γ -> Ref σ (τ :: Γ)
|
||||
|
||||
data Term : Ctx -> Type -> U where
|
||||
App : {Γ : Ctx} {σ τ : Type} -> Term Γ (σ ~> τ) -> Term Γ σ -> Term Γ τ
|
||||
Lam : {Γ : Ctx} {σ τ : Type} -> Term (σ :: Γ) τ -> Term Γ (σ ~> τ)
|
||||
Var : {Γ : Ctx} {σ : Type} -> Ref σ Γ → Term Γ σ
|
||||
|
||||
-- FIXME, I'm not getting an error for Nil, but it's shadowing Nil
|
||||
data Env : Ctx -> U where
|
||||
ENil : Env Nil
|
||||
ECons : {Γ : Ctx} {σ : Type} → Val σ → Env Γ → Env (σ :: Γ)
|
||||
@@ -17,6 +17,7 @@ data Either : U -> U -> U where
|
||||
|
||||
infixr 0 _$_
|
||||
|
||||
-- Currently very noisy in generated code
|
||||
_$_ : {a b : U} -> (a -> b) -> a -> b
|
||||
f $ a = f a
|
||||
|
||||
|
||||
52
src/Lib/Common.idr
Normal file
52
src/Lib/Common.idr
Normal file
@@ -0,0 +1,52 @@
|
||||
module Lib.Common
|
||||
|
||||
import Data.String
|
||||
|
||||
-- I was going to use a record, but we're peeling this off of bounds at the moment.
|
||||
public export
|
||||
FC : Type
|
||||
FC = (Int,Int)
|
||||
|
||||
public export
|
||||
interface HasFC a where
|
||||
getFC : a -> FC
|
||||
|
||||
%name FC fc
|
||||
|
||||
export
|
||||
emptyFC : FC
|
||||
emptyFC = (0,0)
|
||||
|
||||
-- Error of a parse
|
||||
public export
|
||||
data Error = E FC String
|
||||
%name Error err
|
||||
|
||||
public export
|
||||
showError : String -> Error -> String
|
||||
showError src (E (line, col) msg) = "ERROR at \{show (line,col)}: \{msg}\n" ++ go 0 (lines src)
|
||||
where
|
||||
go : Int -> List String -> String
|
||||
go l [] = ""
|
||||
go l (x :: xs) =
|
||||
if l == line then
|
||||
" \{x}\n \{replicate (cast col) ' '}^\n"
|
||||
else if line - 3 < l then " " ++ x ++ "\n" ++ go (l + 1) xs
|
||||
else go (l + 1) xs
|
||||
|
||||
public export
|
||||
data Fixity = InfixL | InfixR | Infix
|
||||
|
||||
export
|
||||
Show Fixity where
|
||||
show InfixL = "infixl"
|
||||
show InfixR = "infixr"
|
||||
show Infix = "infix"
|
||||
|
||||
public export
|
||||
record OpDef where
|
||||
constructor MkOp
|
||||
name : String
|
||||
prec : Int
|
||||
fix : Fixity
|
||||
|
||||
@@ -95,7 +95,6 @@ pArg = do
|
||||
(Explicit,fc,) <$> atom
|
||||
<|> (Implicit,fc,) <$> braces typeExpr
|
||||
<|> (Auto,fc,) <$> dbraces typeExpr
|
||||
<|> (Explicit,fc,) . RVar fc <$> token Oper
|
||||
|
||||
AppSpine = List (Icit,FC,Raw)
|
||||
|
||||
@@ -203,13 +202,6 @@ caseExpr = do
|
||||
alts <- startBlock $ someSame $ caseAlt
|
||||
pure $ RCase fc sc alts
|
||||
|
||||
doArrow : Parser DoStmt
|
||||
doArrow = do
|
||||
fc <- getPos
|
||||
name <- try $ ident <* keyword "<-"
|
||||
expr <- term
|
||||
pure $ DoArrow fc name expr
|
||||
|
||||
doStmt : Parser DoStmt
|
||||
doStmt
|
||||
= DoArrow <$> getPos <*> (try $ ident <* keyword "<-") <*> term
|
||||
@@ -232,9 +224,8 @@ varname = (ident <|> uident <|> keyword "_" *> pure "_")
|
||||
|
||||
ebind : Parser (List (FC, String, Icit, Raw))
|
||||
ebind = do
|
||||
sym "("
|
||||
names <- some $ withPos varname
|
||||
sym ":"
|
||||
-- don't commit until we see the ":"
|
||||
names <- try (sym "(" *> some (withPos varname) <* sym ":")
|
||||
ty <- typeExpr
|
||||
sym ")"
|
||||
pure $ map (\(pos, name) => (pos, name, Explicit, ty)) names
|
||||
@@ -262,7 +253,7 @@ arrow = sym "->" <|> sym "→"
|
||||
-- Collect a bunch of binders (A : U) {y : A} -> ...
|
||||
binders : Parser Raw
|
||||
binders = do
|
||||
binds <- many (abind <|> ibind <|> try ebind)
|
||||
binds <- many (abind <|> ibind <|> ebind)
|
||||
arrow
|
||||
scope <- typeExpr
|
||||
pure $ foldr (uncurry mkBind) scope (join binds)
|
||||
@@ -286,7 +277,7 @@ typeExpr = binders
|
||||
|
||||
export
|
||||
parseSig : Parser Decl
|
||||
parseSig = TypeSig <$> getPos <*> some (ident <|> uident) <* keyword ":" <*> typeExpr
|
||||
parseSig = TypeSig <$> getPos <*> try (some (ident <|> uident) <* keyword ":") <*> typeExpr
|
||||
|
||||
parseImport : Parser Import
|
||||
parseImport = MkImport <$> getPos <* keyword "import" <*> uident
|
||||
@@ -364,7 +355,7 @@ parseNorm = DCheck <$> getPos <* keyword "#check" <*> typeExpr <* keyword ":" <*
|
||||
|
||||
export
|
||||
parseDecl : Parser Decl
|
||||
parseDecl = parseMixfix <|> parsePType <|> parsePFunc <|> parseNorm <|> parseData <|> (try $ parseSig) <|> parseDef
|
||||
parseDecl = parseMixfix <|> parsePType <|> parsePFunc <|> parseNorm <|> parseData <|> parseSig <|> parseDef
|
||||
|
||||
|
||||
export
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
module Lib.Parser.Impl
|
||||
|
||||
import Lib.Token
|
||||
import Lib.Common
|
||||
import Data.String
|
||||
import Data.Nat
|
||||
|
||||
@@ -8,54 +9,6 @@ public export
|
||||
TokenList : Type
|
||||
TokenList = List BTok
|
||||
|
||||
public export
|
||||
data Fixity = InfixL | InfixR | Infix
|
||||
|
||||
export
|
||||
Show Fixity where
|
||||
show InfixL = "infixl"
|
||||
show InfixR = "infixr"
|
||||
show Infix = "infix"
|
||||
|
||||
-- I was going to use a record, but we're peeling this off of bounds at the moment.
|
||||
public export
|
||||
FC : Type
|
||||
FC = (Int,Int)
|
||||
|
||||
public export
|
||||
interface HasFC a where
|
||||
getFC : a -> FC
|
||||
|
||||
%name FC fc
|
||||
|
||||
export
|
||||
emptyFC : FC
|
||||
emptyFC = (0,0)
|
||||
|
||||
-- Error of a parse
|
||||
public export
|
||||
data Error = E FC String
|
||||
%name Error err
|
||||
|
||||
public export
|
||||
showError : String -> Error -> String
|
||||
showError src (E (line, col) msg) = "ERROR at \{show (line,col)}: \{msg}\n" ++ go 0 (lines src)
|
||||
where
|
||||
go : Int -> List String -> String
|
||||
go l [] = ""
|
||||
go l (x :: xs) =
|
||||
if l == line then
|
||||
" \{x}\n \{replicate (cast col) ' '}^\n"
|
||||
else if line - 3 < l then " " ++ x ++ "\n" ++ go (l + 1) xs
|
||||
else go (l + 1) xs
|
||||
|
||||
public export
|
||||
record OpDef where
|
||||
constructor MkOp
|
||||
name : String
|
||||
prec : Int
|
||||
fix : Fixity
|
||||
|
||||
-- Result of a parse
|
||||
public export
|
||||
data Result : Type -> Type where
|
||||
|
||||
@@ -9,7 +9,6 @@ data Kind
|
||||
= Ident
|
||||
| UIdent
|
||||
| Keyword
|
||||
| Oper
|
||||
| MixFix
|
||||
| Number
|
||||
| Character
|
||||
@@ -29,7 +28,6 @@ Show Kind where
|
||||
show Ident = "Ident"
|
||||
show UIdent = "UIdent"
|
||||
show Keyword = "Keyword"
|
||||
show Oper = "Oper"
|
||||
show MixFix = "MixFix"
|
||||
show Number = "Number"
|
||||
show Character = "Character"
|
||||
@@ -47,7 +45,6 @@ Eq Kind where
|
||||
Ident == Ident = True
|
||||
UIdent == UIdent = True
|
||||
Keyword == Keyword = True
|
||||
Oper == Oper = True
|
||||
MixFix == MixFix = True
|
||||
Number == Number = True
|
||||
Character == Character = True
|
||||
|
||||
@@ -3,10 +3,12 @@ module Lib.Tokenizer
|
||||
import Text.Lexer
|
||||
import Text.Lexer.Tokenizer
|
||||
import Lib.Token
|
||||
import Lib.Common
|
||||
|
||||
keywords : List String
|
||||
keywords = ["let", "in", "where", "case", "of", "data", "U", "do",
|
||||
"ptype", "pfunc", "module", "infixl", "infixr", "infix"]
|
||||
"ptype", "pfunc", "module", "infixl", "infixr", "infix",
|
||||
"->", "→", ":", "=>", ":=", "=", "<-", "\\", "_"]
|
||||
|
||||
specialOps : List String
|
||||
specialOps = ["->", ":", "=>", ":=", "=", "<-"]
|
||||
@@ -17,18 +19,12 @@ checkKW s = if elem s keywords then Tok Keyword s else Tok Ident s
|
||||
checkUKW : String -> Token Kind
|
||||
checkUKW s = if elem s keywords then Tok Keyword s else Tok UIdent s
|
||||
|
||||
checkOp : String -> Token Kind
|
||||
checkOp s = if elem s specialOps then Tok Keyword s else Tok Oper s
|
||||
|
||||
isOpChar : Char -> Bool
|
||||
isOpChar c = c `elem` (unpack ":!#$%&*+./<=>?@\\^|-~")
|
||||
|
||||
opChar : Lexer
|
||||
opChar = pred isOpChar
|
||||
|
||||
identMore : Lexer
|
||||
identMore = alphaNum <|> exact "." <|> exact "'" <|> exact "_"
|
||||
|
||||
singleton : Lexer
|
||||
singleton = oneOf "()\\{}[],"
|
||||
|
||||
quo : Recognise True
|
||||
quo = is '"'
|
||||
|
||||
@@ -52,25 +48,34 @@ opMiddle = pred (\c => not (isSpace c || c == '_'))
|
||||
|
||||
rawTokens : Tokenizer (Token Kind)
|
||||
rawTokens
|
||||
= match (lower <+> many identMore) checkKW
|
||||
<|> match (upper <+> many identMore) checkUKW
|
||||
<|> match (some digit) (Tok Number)
|
||||
<|> match (is '#' <+> many alpha) (Tok Pragma)
|
||||
<|> match charLit (Tok Character)
|
||||
= match spaces (Tok Space)
|
||||
-- { is singleton except for {{
|
||||
<|> match (exact "{{" <|> exact "}}") (Tok Keyword)
|
||||
-- need to make this an ident
|
||||
<|> match (exact ",") (checkKW)
|
||||
-- for now, our lambda slash is singleton
|
||||
<|> match (singleton) (Tok Symbol)
|
||||
-- TODO Drop MixFix token type when we support if_then_else_
|
||||
<|> match (exact "_" <+> (some opMiddle) <+> exact "_") (Tok MixFix)
|
||||
<|> match (quo <+> manyUntil quo (esc any <|> any) <+> quo) (Tok StringKind . unquote)
|
||||
-- REVIEW - expect non-alpha after?
|
||||
<|> match (some digit) (Tok Number)
|
||||
-- for module names and maybe type constructors
|
||||
<|> match (charLit) (Tok Character)
|
||||
<|> match (is '#' <+> many alpha) (Tok Pragma)
|
||||
<|> match (lineComment (exact "--")) (Tok Space)
|
||||
<|> match (blockComment (exact "/-") (exact "-/")) (Tok Space)
|
||||
<|> match (exact ",") (Tok Oper)
|
||||
<|> match (some opChar) checkOp
|
||||
<|> match (exact "{{" <|> exact "}}") (Tok Keyword)
|
||||
<|> match symbol (Tok Symbol)
|
||||
<|> match spaces (Tok Space)
|
||||
<|> match (upper <+> many identMore) checkUKW
|
||||
<|> match (quo <+> manyUntil quo (esc any <|> any) <+> quo) (Tok StringKind . unquote)
|
||||
-- accept almost everything, but
|
||||
<|> match (some (non (space <|> singleton))) checkKW
|
||||
|
||||
notSpace : WithBounds (Token Kind) -> Bool
|
||||
notSpace (MkBounded (Tok Space _) _ _) = False
|
||||
notSpace _ = True
|
||||
|
||||
export
|
||||
tokenise : String -> List BTok
|
||||
tokenise = filter notSpace . fst . lex rawTokens
|
||||
tokenise : String -> Either Error (List BTok)
|
||||
tokenise s = case lex rawTokens s of
|
||||
(toks, EndInput, l, c, what) => Right (filter notSpace toks)
|
||||
(toks, reason, l, c, what) => Left (E (l,c) "\{show reason}")
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
module Lib.Types
|
||||
|
||||
-- For FC, Error
|
||||
import public Lib.Parser.Impl
|
||||
import public Lib.Common
|
||||
import Lib.Prettier
|
||||
|
||||
import public Control.Monad.Error.Either
|
||||
@@ -433,7 +433,7 @@ names ctx = toList $ map fst ctx.types
|
||||
|
||||
public export
|
||||
M : Type -> Type
|
||||
M = (StateT TopContext (EitherT Impl.Error IO))
|
||||
M = (StateT TopContext (EitherT Error IO))
|
||||
|
||||
||| Force argument and print if verbose is true
|
||||
export
|
||||
|
||||
@@ -11,7 +11,7 @@ import Data.IORef
|
||||
-- import Lib.Elab
|
||||
import Lib.Compile
|
||||
import Lib.Parser
|
||||
-- import Lib.Parser.Impl
|
||||
import Lib.Parser.Impl
|
||||
import Lib.Prettier
|
||||
import Lib.ProcessDecl
|
||||
import Lib.Token
|
||||
@@ -60,7 +60,8 @@ processModule base stk name = do
|
||||
let fn = base ++ "/" ++ name ++ ".newt"
|
||||
Right src <- readFile $ fn
|
||||
| Left err => fail (show err)
|
||||
let toks = tokenise src
|
||||
let Right toks = tokenise src
|
||||
| Left err => fail (showError src err)
|
||||
|
||||
let Right (modName, ops, toks) := partialParse parseModHeader top.ops toks
|
||||
| Left err => fail (showError src err)
|
||||
|
||||
Reference in New Issue
Block a user