Parsing updates for unicode

- Allow unicode characters in indents and operators
- Show lexing errors
This commit is contained in:
2024-11-02 10:22:04 -07:00
parent d41558c219
commit 6164893da5
13 changed files with 152 additions and 101 deletions

1
.gitignore vendored
View File

@@ -7,3 +7,4 @@ build/
*.agdai *.agdai
*.js *.js
input.txt input.txt
node_modules

22
TODO.md
View File

@@ -3,15 +3,24 @@
- [ ] Allow unicode operators/names - [ ] Allow unicode operators/names
- refactored parser to prep for this - refactored parser to prep for this
- [ ] get rid of stray INFO from auto resolution - [ ] Web tool
- [ ] handle if_then_else_j - edit, view output, view js, run js, monaco would be nice.
- need to shim out Buffer
- [x] get rid of stray INFO from auto resolution
- [ ] handle if_then_else_ style mixfix
- [ ] Search should look at context
- [ ] records
- [ ] copattern matching
- [ ] Support @ on the LHS
- [x] Remember operators from imports - [x] Remember operators from imports
- [ ] Default cases for non-primitives (currently gets expanded to all constructors) - [ ] Default cases for non-primitives (currently gets expanded to all constructors)
- This may need a little care. But I think I could collect all constructors that only match wildcards into a single case. This would lose any information from the individual, unnamed cases though.
- There are cases where we have `_` and then `Foo` on the next line, but they should all get collected into the `Foo` case. I think I sorted all of this out for primitives.
- [x] Case for primitives - [x] Case for primitives
- [ ] aoc2023 translation - [ ] aoc2023 translation
- [x] day1 - [x] day1
- [x] day2 - [x] day2
- some "real world" examples -v - some "real world" examples
- [x] Maybe Eq and stuff would work for typeclass without dealing with unification issues yet - [x] Maybe Eq and stuff would work for typeclass without dealing with unification issues yet
- [x] unsolved meta errors repeat (need to freeze or only report at end) - [x] unsolved meta errors repeat (need to freeze or only report at end)
- [x] Sanitize JS idents, e.g. `_+_` - [x] Sanitize JS idents, e.g. `_+_`
@@ -22,7 +31,6 @@
leave that implicit for efficiency. I think it would also make printing more readable. leave that implicit for efficiency. I think it would also make printing more readable.
- When printing `Value`, I now print the spine size instead of spine. - When printing `Value`, I now print the spine size instead of spine.
- [x] eval for case (see order.newt) - [x] eval for case (see order.newt)
- [ ] dynamic pattern unification (add test case first)
- [x] switch from commit/mustWork to checking progress - [x] switch from commit/mustWork to checking progress
- [x] type constructors are no longer generated? And seem to have 0 arity. - [x] type constructors are no longer generated? And seem to have 0 arity.
- [x] raw let is not yet implemented (although define used by case tree building) - [x] raw let is not yet implemented (although define used by case tree building)
@@ -60,17 +68,19 @@
- [ ] magic nat (codegen as number with appropriate pattern matching) - [ ] magic nat (codegen as number with appropriate pattern matching)
- [ ] magic tuple? (codegen as array) - [ ] magic tuple? (codegen as array)
- [ ] magic newtype? (drop them in codegen) - [ ] magic newtype? (drop them in codegen)
- [ ] records / copatterns
- [x] vscode: syntax highlighting for String - [x] vscode: syntax highlighting for String
- [ ] add `pop` or variant of `pfunc` that maps to an operator, giving the js operator and precedence on RHS - [ ] add `pop` or variant of `pfunc` that maps to an operator, giving the js operator and precedence on RHS
### Parsing ### Parsing
- [ ] consider allowing σ etc in identifiers - [ ] consider allowing σ etc in identifiers
- Probably need to merge oper / ident first and sort out mixfix in parsing. - Probably need to merge oper / ident first and sort out mixfix in parsing
- The mixfix parsing can handle this now, need to update lexing.
- [ ] Parse error not ideal for `\x y z b=> b` (points to lambda)
### Background ### Background
- [ ] Read Ulf Norell thesis - [ ] Read Ulf Norell thesis
- [ ] Finish reading dynamic pattern unification paper to see what is missing/wrong with the current implementation

40
newt/Combinatory.newt Normal file
View File

@@ -0,0 +1,40 @@
module Combinatory
data Unit : U where
MkUnit : Unit
infixr 7 _::_
data List : U -> U where
Nil : {A : U} -> List A
_::_ : {A : U} -> A -> List A -> List A
-- prj/menagerie/papers/combinatory
infixr 6 _~>_
data Type : U where
ι : Type
_~>_ : Type -> Type -> Type
A : U
A = Unit
Val : Type -> U
Val ι = A
Val (x ~> y) = Val x -> Val y
Ctx : U
Ctx = List Type
data Ref : Type -> Ctx -> U where
Z : {σ : Type} {Γ : Ctx} -> Ref σ (σ :: Γ)
S : {σ τ : Type} {Γ : Ctx} -> Ref σ Γ -> Ref σ (τ :: Γ)
data Term : Ctx -> Type -> U where
App : {Γ : Ctx} {σ τ : Type} -> Term Γ (σ ~> τ) -> Term Γ σ -> Term Γ τ
Lam : {Γ : Ctx} {σ τ : Type} -> Term (σ :: Γ) τ -> Term Γ (σ ~> τ)
Var : {Γ : Ctx} {σ : Type} -> Ref σ Γ Term Γ σ
-- FIXME, I'm not getting an error for Nil, but it's shadowing Nil
data Env : Ctx -> U where
ENil : Env Nil
ECons : {Γ : Ctx} {σ : Type} Val σ Env Γ Env (σ :: Γ)

View File

@@ -17,6 +17,7 @@ data Either : U -> U -> U where
infixr 0 _$_ infixr 0 _$_
-- Currently very noisy in generated code
_$_ : {a b : U} -> (a -> b) -> a -> b _$_ : {a b : U} -> (a -> b) -> a -> b
f $ a = f a f $ a = f a

52
src/Lib/Common.idr Normal file
View File

@@ -0,0 +1,52 @@
module Lib.Common
import Data.String
-- I was going to use a record, but we're peeling this off of bounds at the moment.
public export
FC : Type
FC = (Int,Int)
public export
interface HasFC a where
getFC : a -> FC
%name FC fc
export
emptyFC : FC
emptyFC = (0,0)
-- Error of a parse
public export
data Error = E FC String
%name Error err
public export
showError : String -> Error -> String
showError src (E (line, col) msg) = "ERROR at \{show (line,col)}: \{msg}\n" ++ go 0 (lines src)
where
go : Int -> List String -> String
go l [] = ""
go l (x :: xs) =
if l == line then
" \{x}\n \{replicate (cast col) ' '}^\n"
else if line - 3 < l then " " ++ x ++ "\n" ++ go (l + 1) xs
else go (l + 1) xs
public export
data Fixity = InfixL | InfixR | Infix
export
Show Fixity where
show InfixL = "infixl"
show InfixR = "infixr"
show Infix = "infix"
public export
record OpDef where
constructor MkOp
name : String
prec : Int
fix : Fixity

View File

@@ -95,7 +95,6 @@ pArg = do
(Explicit,fc,) <$> atom (Explicit,fc,) <$> atom
<|> (Implicit,fc,) <$> braces typeExpr <|> (Implicit,fc,) <$> braces typeExpr
<|> (Auto,fc,) <$> dbraces typeExpr <|> (Auto,fc,) <$> dbraces typeExpr
<|> (Explicit,fc,) . RVar fc <$> token Oper
AppSpine = List (Icit,FC,Raw) AppSpine = List (Icit,FC,Raw)
@@ -203,13 +202,6 @@ caseExpr = do
alts <- startBlock $ someSame $ caseAlt alts <- startBlock $ someSame $ caseAlt
pure $ RCase fc sc alts pure $ RCase fc sc alts
doArrow : Parser DoStmt
doArrow = do
fc <- getPos
name <- try $ ident <* keyword "<-"
expr <- term
pure $ DoArrow fc name expr
doStmt : Parser DoStmt doStmt : Parser DoStmt
doStmt doStmt
= DoArrow <$> getPos <*> (try $ ident <* keyword "<-") <*> term = DoArrow <$> getPos <*> (try $ ident <* keyword "<-") <*> term
@@ -232,9 +224,8 @@ varname = (ident <|> uident <|> keyword "_" *> pure "_")
ebind : Parser (List (FC, String, Icit, Raw)) ebind : Parser (List (FC, String, Icit, Raw))
ebind = do ebind = do
sym "(" -- don't commit until we see the ":"
names <- some $ withPos varname names <- try (sym "(" *> some (withPos varname) <* sym ":")
sym ":"
ty <- typeExpr ty <- typeExpr
sym ")" sym ")"
pure $ map (\(pos, name) => (pos, name, Explicit, ty)) names pure $ map (\(pos, name) => (pos, name, Explicit, ty)) names
@@ -262,7 +253,7 @@ arrow = sym "->" <|> sym "→"
-- Collect a bunch of binders (A : U) {y : A} -> ... -- Collect a bunch of binders (A : U) {y : A} -> ...
binders : Parser Raw binders : Parser Raw
binders = do binders = do
binds <- many (abind <|> ibind <|> try ebind) binds <- many (abind <|> ibind <|> ebind)
arrow arrow
scope <- typeExpr scope <- typeExpr
pure $ foldr (uncurry mkBind) scope (join binds) pure $ foldr (uncurry mkBind) scope (join binds)
@@ -286,7 +277,7 @@ typeExpr = binders
export export
parseSig : Parser Decl parseSig : Parser Decl
parseSig = TypeSig <$> getPos <*> some (ident <|> uident) <* keyword ":" <*> typeExpr parseSig = TypeSig <$> getPos <*> try (some (ident <|> uident) <* keyword ":") <*> typeExpr
parseImport : Parser Import parseImport : Parser Import
parseImport = MkImport <$> getPos <* keyword "import" <*> uident parseImport = MkImport <$> getPos <* keyword "import" <*> uident
@@ -364,7 +355,7 @@ parseNorm = DCheck <$> getPos <* keyword "#check" <*> typeExpr <* keyword ":" <*
export export
parseDecl : Parser Decl parseDecl : Parser Decl
parseDecl = parseMixfix <|> parsePType <|> parsePFunc <|> parseNorm <|> parseData <|> (try $ parseSig) <|> parseDef parseDecl = parseMixfix <|> parsePType <|> parsePFunc <|> parseNorm <|> parseData <|> parseSig <|> parseDef
export export

View File

@@ -1,6 +1,7 @@
module Lib.Parser.Impl module Lib.Parser.Impl
import Lib.Token import Lib.Token
import Lib.Common
import Data.String import Data.String
import Data.Nat import Data.Nat
@@ -8,54 +9,6 @@ public export
TokenList : Type TokenList : Type
TokenList = List BTok TokenList = List BTok
public export
data Fixity = InfixL | InfixR | Infix
export
Show Fixity where
show InfixL = "infixl"
show InfixR = "infixr"
show Infix = "infix"
-- I was going to use a record, but we're peeling this off of bounds at the moment.
public export
FC : Type
FC = (Int,Int)
public export
interface HasFC a where
getFC : a -> FC
%name FC fc
export
emptyFC : FC
emptyFC = (0,0)
-- Error of a parse
public export
data Error = E FC String
%name Error err
public export
showError : String -> Error -> String
showError src (E (line, col) msg) = "ERROR at \{show (line,col)}: \{msg}\n" ++ go 0 (lines src)
where
go : Int -> List String -> String
go l [] = ""
go l (x :: xs) =
if l == line then
" \{x}\n \{replicate (cast col) ' '}^\n"
else if line - 3 < l then " " ++ x ++ "\n" ++ go (l + 1) xs
else go (l + 1) xs
public export
record OpDef where
constructor MkOp
name : String
prec : Int
fix : Fixity
-- Result of a parse -- Result of a parse
public export public export
data Result : Type -> Type where data Result : Type -> Type where

View File

@@ -9,7 +9,6 @@ data Kind
= Ident = Ident
| UIdent | UIdent
| Keyword | Keyword
| Oper
| MixFix | MixFix
| Number | Number
| Character | Character
@@ -29,7 +28,6 @@ Show Kind where
show Ident = "Ident" show Ident = "Ident"
show UIdent = "UIdent" show UIdent = "UIdent"
show Keyword = "Keyword" show Keyword = "Keyword"
show Oper = "Oper"
show MixFix = "MixFix" show MixFix = "MixFix"
show Number = "Number" show Number = "Number"
show Character = "Character" show Character = "Character"
@@ -47,7 +45,6 @@ Eq Kind where
Ident == Ident = True Ident == Ident = True
UIdent == UIdent = True UIdent == UIdent = True
Keyword == Keyword = True Keyword == Keyword = True
Oper == Oper = True
MixFix == MixFix = True MixFix == MixFix = True
Number == Number = True Number == Number = True
Character == Character = True Character == Character = True

View File

@@ -3,10 +3,12 @@ module Lib.Tokenizer
import Text.Lexer import Text.Lexer
import Text.Lexer.Tokenizer import Text.Lexer.Tokenizer
import Lib.Token import Lib.Token
import Lib.Common
keywords : List String keywords : List String
keywords = ["let", "in", "where", "case", "of", "data", "U", "do", keywords = ["let", "in", "where", "case", "of", "data", "U", "do",
"ptype", "pfunc", "module", "infixl", "infixr", "infix"] "ptype", "pfunc", "module", "infixl", "infixr", "infix",
"->", "", ":", "=>", ":=", "=", "<-", "\\", "_"]
specialOps : List String specialOps : List String
specialOps = ["->", ":", "=>", ":=", "=", "<-"] specialOps = ["->", ":", "=>", ":=", "=", "<-"]
@@ -17,18 +19,12 @@ checkKW s = if elem s keywords then Tok Keyword s else Tok Ident s
checkUKW : String -> Token Kind checkUKW : String -> Token Kind
checkUKW s = if elem s keywords then Tok Keyword s else Tok UIdent s checkUKW s = if elem s keywords then Tok Keyword s else Tok UIdent s
checkOp : String -> Token Kind
checkOp s = if elem s specialOps then Tok Keyword s else Tok Oper s
isOpChar : Char -> Bool
isOpChar c = c `elem` (unpack ":!#$%&*+./<=>?@\\^|-~")
opChar : Lexer
opChar = pred isOpChar
identMore : Lexer identMore : Lexer
identMore = alphaNum <|> exact "." <|> exact "'" <|> exact "_" identMore = alphaNum <|> exact "." <|> exact "'" <|> exact "_"
singleton : Lexer
singleton = oneOf "()\\{}[],"
quo : Recognise True quo : Recognise True
quo = is '"' quo = is '"'
@@ -52,25 +48,34 @@ opMiddle = pred (\c => not (isSpace c || c == '_'))
rawTokens : Tokenizer (Token Kind) rawTokens : Tokenizer (Token Kind)
rawTokens rawTokens
= match (lower <+> many identMore) checkKW = match spaces (Tok Space)
<|> match (upper <+> many identMore) checkUKW -- { is singleton except for {{
<|> match (some digit) (Tok Number) <|> match (exact "{{" <|> exact "}}") (Tok Keyword)
<|> match (is '#' <+> many alpha) (Tok Pragma) -- need to make this an ident
<|> match charLit (Tok Character) <|> match (exact ",") (checkKW)
-- for now, our lambda slash is singleton
<|> match (singleton) (Tok Symbol)
-- TODO Drop MixFix token type when we support if_then_else_
<|> match (exact "_" <+> (some opMiddle) <+> exact "_") (Tok MixFix) <|> match (exact "_" <+> (some opMiddle) <+> exact "_") (Tok MixFix)
<|> match (quo <+> manyUntil quo (esc any <|> any) <+> quo) (Tok StringKind . unquote) -- REVIEW - expect non-alpha after?
<|> match (some digit) (Tok Number)
-- for module names and maybe type constructors
<|> match (charLit) (Tok Character)
<|> match (is '#' <+> many alpha) (Tok Pragma)
<|> match (lineComment (exact "--")) (Tok Space) <|> match (lineComment (exact "--")) (Tok Space)
<|> match (blockComment (exact "/-") (exact "-/")) (Tok Space) <|> match (blockComment (exact "/-") (exact "-/")) (Tok Space)
<|> match (exact ",") (Tok Oper) <|> match (upper <+> many identMore) checkUKW
<|> match (some opChar) checkOp <|> match (quo <+> manyUntil quo (esc any <|> any) <+> quo) (Tok StringKind . unquote)
<|> match (exact "{{" <|> exact "}}") (Tok Keyword) -- accept almost everything, but
<|> match symbol (Tok Symbol) <|> match (some (non (space <|> singleton))) checkKW
<|> match spaces (Tok Space)
notSpace : WithBounds (Token Kind) -> Bool notSpace : WithBounds (Token Kind) -> Bool
notSpace (MkBounded (Tok Space _) _ _) = False notSpace (MkBounded (Tok Space _) _ _) = False
notSpace _ = True notSpace _ = True
export export
tokenise : String -> List BTok tokenise : String -> Either Error (List BTok)
tokenise = filter notSpace . fst . lex rawTokens tokenise s = case lex rawTokens s of
(toks, EndInput, l, c, what) => Right (filter notSpace toks)
(toks, reason, l, c, what) => Left (E (l,c) "\{show reason}")

View File

@@ -1,7 +1,7 @@
module Lib.Types module Lib.Types
-- For FC, Error -- For FC, Error
import public Lib.Parser.Impl import public Lib.Common
import Lib.Prettier import Lib.Prettier
import public Control.Monad.Error.Either import public Control.Monad.Error.Either
@@ -433,7 +433,7 @@ names ctx = toList $ map fst ctx.types
public export public export
M : Type -> Type M : Type -> Type
M = (StateT TopContext (EitherT Impl.Error IO)) M = (StateT TopContext (EitherT Error IO))
||| Force argument and print if verbose is true ||| Force argument and print if verbose is true
export export

View File

@@ -11,7 +11,7 @@ import Data.IORef
-- import Lib.Elab -- import Lib.Elab
import Lib.Compile import Lib.Compile
import Lib.Parser import Lib.Parser
-- import Lib.Parser.Impl import Lib.Parser.Impl
import Lib.Prettier import Lib.Prettier
import Lib.ProcessDecl import Lib.ProcessDecl
import Lib.Token import Lib.Token
@@ -60,7 +60,8 @@ processModule base stk name = do
let fn = base ++ "/" ++ name ++ ".newt" let fn = base ++ "/" ++ name ++ ".newt"
Right src <- readFile $ fn Right src <- readFile $ fn
| Left err => fail (show err) | Left err => fail (show err)
let toks = tokenise src let Right toks = tokenise src
| Left err => fail (showError src err)
let Right (modName, ops, toks) := partialParse parseModHeader top.ops toks let Right (modName, ops, toks) := partialParse parseModHeader top.ops toks
| Left err => fail (showError src err) | Left err => fail (showError src err)

View File

@@ -15,7 +15,7 @@ head (x :: xs) = x
-- These came from a Conor McBride lecture where they use SHE -- These came from a Conor McBride lecture where they use SHE
vapp : {s t: U} {k : Nat} -> Vect k (s -> t) -> Vect k s -> Vect k t vapp : {s t : U} {k : Nat} -> Vect k (s -> t) -> Vect k s -> Vect k t
vapp (f :: fs) (t :: ts) = f t :: vapp fs ts vapp (f :: fs) (t :: ts) = f t :: vapp fs ts
vapp Nil Nil = Nil vapp Nil Nil = Nil
@@ -32,7 +32,7 @@ fmap f (x :: xs) = (f x :: fmap f xs)
pure : {a : U} {n : Nat} -> a -> Vect n a pure : {a : U} {n : Nat} -> a -> Vect n a
pure {a} {n} = vec n pure {a} {n} = vec n
_<*>_ : {s t: U} {k : Nat} -> Vect k (s -> t) -> Vect k s -> Vect k t _<*>_ : {s t : U} {k : Nat} -> Vect k (s -> t) -> Vect k s -> Vect k t
_<*>_ = vapp _<*>_ = vapp
-- and idiom brackets (maybe someday) -- and idiom brackets (maybe someday)

View File

@@ -12,7 +12,7 @@ group1 : {A B : U}(x y z : A) -> B -> B
group1 = \x y z b => b group1 = \x y z b => b
group2 : {A B}(x y z : A) -> B -> B group2 : {A B}(x y z : A) -> B -> B
group2 = \x y z b=> b group2 = \x y z b => b
-- explicit id function used for annotation as in Idris -- explicit id function used for annotation as in Idris
the : (A : _) -> A -> A the : (A : _) -> A -> A