Parsing updates for unicode
- Allow unicode characters in indents and operators - Show lexing errors
This commit is contained in:
52
src/Lib/Common.idr
Normal file
52
src/Lib/Common.idr
Normal file
@@ -0,0 +1,52 @@
|
||||
module Lib.Common
|
||||
|
||||
import Data.String
|
||||
|
||||
-- I was going to use a record, but we're peeling this off of bounds at the moment.
|
||||
public export
|
||||
FC : Type
|
||||
FC = (Int,Int)
|
||||
|
||||
public export
|
||||
interface HasFC a where
|
||||
getFC : a -> FC
|
||||
|
||||
%name FC fc
|
||||
|
||||
export
|
||||
emptyFC : FC
|
||||
emptyFC = (0,0)
|
||||
|
||||
-- Error of a parse
|
||||
public export
|
||||
data Error = E FC String
|
||||
%name Error err
|
||||
|
||||
public export
|
||||
showError : String -> Error -> String
|
||||
showError src (E (line, col) msg) = "ERROR at \{show (line,col)}: \{msg}\n" ++ go 0 (lines src)
|
||||
where
|
||||
go : Int -> List String -> String
|
||||
go l [] = ""
|
||||
go l (x :: xs) =
|
||||
if l == line then
|
||||
" \{x}\n \{replicate (cast col) ' '}^\n"
|
||||
else if line - 3 < l then " " ++ x ++ "\n" ++ go (l + 1) xs
|
||||
else go (l + 1) xs
|
||||
|
||||
public export
|
||||
data Fixity = InfixL | InfixR | Infix
|
||||
|
||||
export
|
||||
Show Fixity where
|
||||
show InfixL = "infixl"
|
||||
show InfixR = "infixr"
|
||||
show Infix = "infix"
|
||||
|
||||
public export
|
||||
record OpDef where
|
||||
constructor MkOp
|
||||
name : String
|
||||
prec : Int
|
||||
fix : Fixity
|
||||
|
||||
@@ -95,7 +95,6 @@ pArg = do
|
||||
(Explicit,fc,) <$> atom
|
||||
<|> (Implicit,fc,) <$> braces typeExpr
|
||||
<|> (Auto,fc,) <$> dbraces typeExpr
|
||||
<|> (Explicit,fc,) . RVar fc <$> token Oper
|
||||
|
||||
AppSpine = List (Icit,FC,Raw)
|
||||
|
||||
@@ -203,13 +202,6 @@ caseExpr = do
|
||||
alts <- startBlock $ someSame $ caseAlt
|
||||
pure $ RCase fc sc alts
|
||||
|
||||
doArrow : Parser DoStmt
|
||||
doArrow = do
|
||||
fc <- getPos
|
||||
name <- try $ ident <* keyword "<-"
|
||||
expr <- term
|
||||
pure $ DoArrow fc name expr
|
||||
|
||||
doStmt : Parser DoStmt
|
||||
doStmt
|
||||
= DoArrow <$> getPos <*> (try $ ident <* keyword "<-") <*> term
|
||||
@@ -232,9 +224,8 @@ varname = (ident <|> uident <|> keyword "_" *> pure "_")
|
||||
|
||||
ebind : Parser (List (FC, String, Icit, Raw))
|
||||
ebind = do
|
||||
sym "("
|
||||
names <- some $ withPos varname
|
||||
sym ":"
|
||||
-- don't commit until we see the ":"
|
||||
names <- try (sym "(" *> some (withPos varname) <* sym ":")
|
||||
ty <- typeExpr
|
||||
sym ")"
|
||||
pure $ map (\(pos, name) => (pos, name, Explicit, ty)) names
|
||||
@@ -262,7 +253,7 @@ arrow = sym "->" <|> sym "→"
|
||||
-- Collect a bunch of binders (A : U) {y : A} -> ...
|
||||
binders : Parser Raw
|
||||
binders = do
|
||||
binds <- many (abind <|> ibind <|> try ebind)
|
||||
binds <- many (abind <|> ibind <|> ebind)
|
||||
arrow
|
||||
scope <- typeExpr
|
||||
pure $ foldr (uncurry mkBind) scope (join binds)
|
||||
@@ -286,7 +277,7 @@ typeExpr = binders
|
||||
|
||||
export
|
||||
parseSig : Parser Decl
|
||||
parseSig = TypeSig <$> getPos <*> some (ident <|> uident) <* keyword ":" <*> typeExpr
|
||||
parseSig = TypeSig <$> getPos <*> try (some (ident <|> uident) <* keyword ":") <*> typeExpr
|
||||
|
||||
parseImport : Parser Import
|
||||
parseImport = MkImport <$> getPos <* keyword "import" <*> uident
|
||||
@@ -364,7 +355,7 @@ parseNorm = DCheck <$> getPos <* keyword "#check" <*> typeExpr <* keyword ":" <*
|
||||
|
||||
export
|
||||
parseDecl : Parser Decl
|
||||
parseDecl = parseMixfix <|> parsePType <|> parsePFunc <|> parseNorm <|> parseData <|> (try $ parseSig) <|> parseDef
|
||||
parseDecl = parseMixfix <|> parsePType <|> parsePFunc <|> parseNorm <|> parseData <|> parseSig <|> parseDef
|
||||
|
||||
|
||||
export
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
module Lib.Parser.Impl
|
||||
|
||||
import Lib.Token
|
||||
import Lib.Common
|
||||
import Data.String
|
||||
import Data.Nat
|
||||
|
||||
@@ -8,54 +9,6 @@ public export
|
||||
TokenList : Type
|
||||
TokenList = List BTok
|
||||
|
||||
public export
|
||||
data Fixity = InfixL | InfixR | Infix
|
||||
|
||||
export
|
||||
Show Fixity where
|
||||
show InfixL = "infixl"
|
||||
show InfixR = "infixr"
|
||||
show Infix = "infix"
|
||||
|
||||
-- I was going to use a record, but we're peeling this off of bounds at the moment.
|
||||
public export
|
||||
FC : Type
|
||||
FC = (Int,Int)
|
||||
|
||||
public export
|
||||
interface HasFC a where
|
||||
getFC : a -> FC
|
||||
|
||||
%name FC fc
|
||||
|
||||
export
|
||||
emptyFC : FC
|
||||
emptyFC = (0,0)
|
||||
|
||||
-- Error of a parse
|
||||
public export
|
||||
data Error = E FC String
|
||||
%name Error err
|
||||
|
||||
public export
|
||||
showError : String -> Error -> String
|
||||
showError src (E (line, col) msg) = "ERROR at \{show (line,col)}: \{msg}\n" ++ go 0 (lines src)
|
||||
where
|
||||
go : Int -> List String -> String
|
||||
go l [] = ""
|
||||
go l (x :: xs) =
|
||||
if l == line then
|
||||
" \{x}\n \{replicate (cast col) ' '}^\n"
|
||||
else if line - 3 < l then " " ++ x ++ "\n" ++ go (l + 1) xs
|
||||
else go (l + 1) xs
|
||||
|
||||
public export
|
||||
record OpDef where
|
||||
constructor MkOp
|
||||
name : String
|
||||
prec : Int
|
||||
fix : Fixity
|
||||
|
||||
-- Result of a parse
|
||||
public export
|
||||
data Result : Type -> Type where
|
||||
|
||||
@@ -9,7 +9,6 @@ data Kind
|
||||
= Ident
|
||||
| UIdent
|
||||
| Keyword
|
||||
| Oper
|
||||
| MixFix
|
||||
| Number
|
||||
| Character
|
||||
@@ -29,7 +28,6 @@ Show Kind where
|
||||
show Ident = "Ident"
|
||||
show UIdent = "UIdent"
|
||||
show Keyword = "Keyword"
|
||||
show Oper = "Oper"
|
||||
show MixFix = "MixFix"
|
||||
show Number = "Number"
|
||||
show Character = "Character"
|
||||
@@ -47,7 +45,6 @@ Eq Kind where
|
||||
Ident == Ident = True
|
||||
UIdent == UIdent = True
|
||||
Keyword == Keyword = True
|
||||
Oper == Oper = True
|
||||
MixFix == MixFix = True
|
||||
Number == Number = True
|
||||
Character == Character = True
|
||||
|
||||
@@ -3,10 +3,12 @@ module Lib.Tokenizer
|
||||
import Text.Lexer
|
||||
import Text.Lexer.Tokenizer
|
||||
import Lib.Token
|
||||
import Lib.Common
|
||||
|
||||
keywords : List String
|
||||
keywords = ["let", "in", "where", "case", "of", "data", "U", "do",
|
||||
"ptype", "pfunc", "module", "infixl", "infixr", "infix"]
|
||||
"ptype", "pfunc", "module", "infixl", "infixr", "infix",
|
||||
"->", "→", ":", "=>", ":=", "=", "<-", "\\", "_"]
|
||||
|
||||
specialOps : List String
|
||||
specialOps = ["->", ":", "=>", ":=", "=", "<-"]
|
||||
@@ -17,18 +19,12 @@ checkKW s = if elem s keywords then Tok Keyword s else Tok Ident s
|
||||
checkUKW : String -> Token Kind
|
||||
checkUKW s = if elem s keywords then Tok Keyword s else Tok UIdent s
|
||||
|
||||
checkOp : String -> Token Kind
|
||||
checkOp s = if elem s specialOps then Tok Keyword s else Tok Oper s
|
||||
|
||||
isOpChar : Char -> Bool
|
||||
isOpChar c = c `elem` (unpack ":!#$%&*+./<=>?@\\^|-~")
|
||||
|
||||
opChar : Lexer
|
||||
opChar = pred isOpChar
|
||||
|
||||
identMore : Lexer
|
||||
identMore = alphaNum <|> exact "." <|> exact "'" <|> exact "_"
|
||||
|
||||
singleton : Lexer
|
||||
singleton = oneOf "()\\{}[],"
|
||||
|
||||
quo : Recognise True
|
||||
quo = is '"'
|
||||
|
||||
@@ -52,25 +48,34 @@ opMiddle = pred (\c => not (isSpace c || c == '_'))
|
||||
|
||||
rawTokens : Tokenizer (Token Kind)
|
||||
rawTokens
|
||||
= match (lower <+> many identMore) checkKW
|
||||
<|> match (upper <+> many identMore) checkUKW
|
||||
<|> match (some digit) (Tok Number)
|
||||
<|> match (is '#' <+> many alpha) (Tok Pragma)
|
||||
<|> match charLit (Tok Character)
|
||||
= match spaces (Tok Space)
|
||||
-- { is singleton except for {{
|
||||
<|> match (exact "{{" <|> exact "}}") (Tok Keyword)
|
||||
-- need to make this an ident
|
||||
<|> match (exact ",") (checkKW)
|
||||
-- for now, our lambda slash is singleton
|
||||
<|> match (singleton) (Tok Symbol)
|
||||
-- TODO Drop MixFix token type when we support if_then_else_
|
||||
<|> match (exact "_" <+> (some opMiddle) <+> exact "_") (Tok MixFix)
|
||||
<|> match (quo <+> manyUntil quo (esc any <|> any) <+> quo) (Tok StringKind . unquote)
|
||||
-- REVIEW - expect non-alpha after?
|
||||
<|> match (some digit) (Tok Number)
|
||||
-- for module names and maybe type constructors
|
||||
<|> match (charLit) (Tok Character)
|
||||
<|> match (is '#' <+> many alpha) (Tok Pragma)
|
||||
<|> match (lineComment (exact "--")) (Tok Space)
|
||||
<|> match (blockComment (exact "/-") (exact "-/")) (Tok Space)
|
||||
<|> match (exact ",") (Tok Oper)
|
||||
<|> match (some opChar) checkOp
|
||||
<|> match (exact "{{" <|> exact "}}") (Tok Keyword)
|
||||
<|> match symbol (Tok Symbol)
|
||||
<|> match spaces (Tok Space)
|
||||
<|> match (upper <+> many identMore) checkUKW
|
||||
<|> match (quo <+> manyUntil quo (esc any <|> any) <+> quo) (Tok StringKind . unquote)
|
||||
-- accept almost everything, but
|
||||
<|> match (some (non (space <|> singleton))) checkKW
|
||||
|
||||
notSpace : WithBounds (Token Kind) -> Bool
|
||||
notSpace (MkBounded (Tok Space _) _ _) = False
|
||||
notSpace _ = True
|
||||
|
||||
export
|
||||
tokenise : String -> List BTok
|
||||
tokenise = filter notSpace . fst . lex rawTokens
|
||||
tokenise : String -> Either Error (List BTok)
|
||||
tokenise s = case lex rawTokens s of
|
||||
(toks, EndInput, l, c, what) => Right (filter notSpace toks)
|
||||
(toks, reason, l, c, what) => Left (E (l,c) "\{show reason}")
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
module Lib.Types
|
||||
|
||||
-- For FC, Error
|
||||
import public Lib.Parser.Impl
|
||||
import public Lib.Common
|
||||
import Lib.Prettier
|
||||
|
||||
import public Control.Monad.Error.Either
|
||||
@@ -433,7 +433,7 @@ names ctx = toList $ map fst ctx.types
|
||||
|
||||
public export
|
||||
M : Type -> Type
|
||||
M = (StateT TopContext (EitherT Impl.Error IO))
|
||||
M = (StateT TopContext (EitherT Error IO))
|
||||
|
||||
||| Force argument and print if verbose is true
|
||||
export
|
||||
|
||||
Reference in New Issue
Block a user