This commit is contained in:
2022-09-08 22:45:07 -07:00
commit 39deff1465
13 changed files with 767 additions and 0 deletions

45
src/Lib/Grammar.idr Normal file
View File

@@ -0,0 +1,45 @@
module Lib.Grammar
-- Derived from Parser/Core.idr - revisit later when we want to be stack safe
-- For simplicity (for now), it drops consumed flag from the type, which I think was
-- there for totality checking. May want it back someday.
data Grammar : (state : Type) -> (tok : Type) -> Type -> Type where
EOF : Grammar state tok ()
Empty : a -> Grammar state tok a
Fail : String -> Grammar state tok a
Commit : Grammar state tok ()
Alt : Grammar state tok ty -> Lazy (Grammar state tok ty) -> Grammar state tok ty
Seq : Grammar state tok a -> (a -> Grammar state tok b) -> Grammar state tok b
Functor (Grammar state tok) where
map f (Fail str) = Fail str
map f (Alt x y) = Alt (map f x) (map f y)
map f (Seq x g) = ?todo_4
map f (Empty v) = Empty (f v)
-- Empty / EOF
map f p = Seq p (Empty . f)
Applicative (Grammar state tok) where
pure a = Empty a
x <*> y = ?hole3
Alternative (Grammar state tok) where
empty = ?hole2
(<|>) = Alt
Monad (Grammar state tok) where
(>>=) = Seq
ParseError : Type
data ParseResult : Type -> Type -> Type -> Type where
Failure : (commit : Bool) -> List ParseError -> ParseResult state tok ty
doParse : Semigroup state =>
state ->
(commit : Bool) ->
Grammar state tok ty ->
List tok ->
ParseResult state tok ty
doParse = ?todo

48
src/Lib/Layout.idr Normal file
View File

@@ -0,0 +1,48 @@
module Lib.Layout
import Lib.Token
import Text.Lexer
-- Dunno if I'll do layout or pass col, but here it is:
intro : List String
intro = [ "where", "let", "of" ]
Position : Type
Position = (Int,Int)
isIntro : BTok -> Bool
isIntro t = elem (value t) intro
export
layout : List BTok -> List BTok
layout = go [] []
where
lbrace = irrelevantBounds (Tok LBrace "{")
semi = irrelevantBounds (Tok Semi ";")
rbrace = irrelevantBounds (Tok RBrace "}")
-- go' does start of block
go' : List BTok -> List Position -> List BTok -> List BTok
-- go does semi and end of block, we require "in" to be on separate, outdented line if not the same line as let
go : List BTok -> List Position -> List BTok -> List BTok
go acc lvls [] = reverse acc
go acc [] ts = go' acc [] ts
go acc lvls@(lvl::rest) toks@(t :: ts) =
let (sr,sc) = start t in
if snd lvl > sc then go (rbrace :: acc) rest toks
else if snd lvl == sc then go' (semi :: acc) lvls toks
else if value t == "in" && sr == fst lvl then go' (rbrace :: acc) rest toks
else go' acc lvls toks
-- handle start of block
go' acc lvls [] = reverse acc
go' acc lvls (t::ts) = case isIntro t of
False => go (t::acc) lvls ts
True => case ts of
t'::ts' => go (t' :: lbrace :: t :: acc) (start t' :: lvls) ts'
[] => go (rbrace :: lbrace :: t :: acc) lvls ts

56
src/Lib/Lexer2.idr Normal file
View File

@@ -0,0 +1,56 @@
module Lib.Lexer2
data UToken
= Ident String
| Let
| In
| Data
| Where
| Forall
| Case
| Of
| Underscore
| Operator String
| Equals
| Dot
| Colon
| Pipe
| RightArrow
| FatArrow
| Number Int
| Lambda
| LeftParen
| RightParen
| LeftBrace
| RightBrace
| Error
-- we can skip this if we keep the text...
Show UToken where
show = \case
Ident name => name
Let => "let"
In => "in"
Data => "data"
Where => "where"
Forall => "forall"
Case => "case"
Of => "of"
Underscore => "_"
Operator name => name
Equals => ?rhs_10
Dot => ?rhs_11
Colon => ?rhs_12
Pipe => ?rhs_13
RightArrow => ?rhs_14
FatArrow => ?rhs_15
Number i => show i
Lambda => ?rhs_17
LeftParen => ?rhs_18
RightParen => ?rhs_19
LeftBrace => ?rhs_20
RightBrace => ?rhs_21
Error => ?rhs_22

81
src/Lib/Parser.idr Normal file
View File

@@ -0,0 +1,81 @@
module Lib.Parser
import Lib.Token
import Lib.Parser.Impl
import Syntax
-- There is the whole core vs surface thing here.
-- might be best to do core first/ Technically don't
-- _need_ a parser, but would be useful for testing.
-- look to pi-forall / ezoo, but I think we start with a
-- TTImpl level grammar, then add a more sugared layer above
-- so holes and all that
-- After the parser runs, see below, take a break and finish pi-forall
-- exercises. There is some fill in the parser stuff that may show
-- the future.
ident = token Ident
term : Parser Term
letExpr : Parser Term
letExpr = Let <$ keyword "let" <*> ident <* keyword "=" <*> term <* keyword "in" <*> term
pattern : Parser Pattern
lamExpr : Parser Term
lamExpr = do
keyword "\\"
commit
name <- pattern
keyword "."
scope <- term
pure $ Lam name scope
caseAlt : Parser CaseAlt
caseExpr : Parser Term
caseExpr = do
_ <- keyword "case"
commit
sc <- term
keyword "of"
alts <- startBlock $ someSame $ caseAlt
pure $ Case sc alts
-- TODO - get this up and running with a case expr to test it
{-
so lets say we wanted to do the indent, the hard way.
what does this look like
We want to say:
kw 'let' $ indentedSome assignment
now assignment is going to be whatever, but we need to make sure the initial token can/must be at
current indentation level.
Ok idris indent is not Col, it's AnyIndent | AtPos Int | AfterPos Int
The col though is somehow sixty.
sixten is sameCol, dropAnchor, with an "environment"
sixty is Position is line and col and either line matches or col < tokenCol.
that's maybe doable..
-}

139
src/Lib/Parser/Impl.idr Normal file
View File

@@ -0,0 +1,139 @@
module Lib.Parser.Impl
import Lib.Token
public export
TokenList : Type
TokenList = List BTok
-- Error of a parse
public export
data Error = E String
%name Error err
-- Result of a parse
public export
data Result : Type -> Type where
OK : a -> (toks : TokenList) -> (com : Bool) -> Result a
Fail : Error -> (toks : TokenList) -> (com : Bool) -> Result a
export
Functor Result where
map f (OK a toks com ) = OK (f a) toks com
map _ (Fail err toks com) = Fail err toks com
-- So sixty just has a newtype function here now (probably for perf).
-- A record might be more ergonomic, but would require a record impl before
-- self hosting.
-- We keep the line and column for indents. The idea being that we check
-- either the col < tokCol or line == tokLine, enabling sameLevel.
-- dunno why I'm making that a pair..
export
data Parser a = P (TokenList -> Bool -> (lc : (Int,Int)) -> Result a)
runP : Parser a -> TokenList -> Bool -> (Int,Int) -> Result a
runP (P f) = f
fail : String -> Parser a
fail msg = P $ \toks,com,col => Fail (E msg) toks com
export
Functor Parser where
map f (P pa) = P $ \ toks, com, col => map f (pa toks com col)
export
Applicative Parser where
pure pa = P (\ toks, com, col => OK pa toks com)
P pab <*> P pa = P $ \toks,com,col =>
case pab toks com col of
Fail err toks com => Fail err toks com
OK f toks com =>
case pa toks com col of
(OK x toks com) => OK (f x) toks com
(Fail err toks com) => Fail err toks com
export
Alternative Parser where
empty = fail "empty"
(P pa) <|> (P pb) = P $ \toks,com,col =>
case pa toks com col of
f@(Fail _ _ com') => if com' then f else pb toks com col
t => t
export
Monad Parser where
P pa >>= pab = P $ \toks,com,col =>
case pa toks com col of
(OK a toks com) => runP (pab a) toks com col
(Fail err xs x) => ?rhs_1
-- do we need this?
pred : (BTok -> Bool) -> String -> Parser String
pred f msg = P $ \toks,com,col =>
case toks of
(t :: ts) => if f t then OK (value t) ts com else Fail (E msg) toks com
[] => Fail (E "eof") toks com
export
commit : Parser ()
commit = P $ \toks,com,col => OK () toks True
export
token : Kind -> Parser String
token k = pred (\t => t.val.kind == k) "Expected a \{show k}"
export
keyword : String -> Parser ()
keyword kw = ignore $ pred (\t => t.val.text == kw) "Expected \{kw}"
many : Parser a -> Parser (List a)
some : Parser a -> Parser (List a)
some p = (::) <$> p <*> many p
many p = some p <|> pure []
-- sixty let has some weird CPS stuff, but essentially:
-- case token_ of
-- Lexer.LLet -> PLet <$> blockOfMany let_ <* token Lexer.In <*> term
-- withIndentationBlock - sets the col
||| Start an indented block and run parser in it
export
startBlock : Parser a -> Parser a
startBlock (P p) = P $ \toks,com,(l,c) => case toks of
[] => p toks com (l,c)
(t :: ts) =>
let (tl,tc) = start t
in p toks com (tl,tc)
||| Assert that parser starts at our current column by
||| checking column and then updating line to match the current
export
sameLevel : Parser a -> Parser a
sameLevel (P p) = P $ \toks,com,(l,c) => case toks of
[] => p toks com (l,c)
(t :: ts) =>
let (tl,tc) = start t
in if tc == c then p toks com (tl, c)
else if c < tc then Fail (E "unexpected indent") toks com
else Fail (E "unexpected indent") toks com
export
someSame : Parser a -> Parser (List a)
someSame = some . sameLevel
||| requires a token to be indented?
indented : Parser a -> Parser a
indented (P p) = P $ \toks,com,(l,c) => case toks of
[] => p toks com (l,c)
(t::ts) =>
let (tl,tc) = start t
in if tc > c || tl == l then p toks com (l,c)
else Fail (E "unexpected outdent") toks com

23
src/Lib/Test.idr Normal file
View File

@@ -0,0 +1,23 @@
import Text.Lexer
data Kind = Ignore | Ident | Oper | Number
ignore : WithBounds (Token Kind) -> Bool
ignore (MkBounded (Tok Ignore _) _ _) = True
ignore _ = False
tmap : TokenMap (Token Kind)
tmap = [
(alpha <+> many alphaNum, Tok Ident),
(some digit, Tok Number),
(some symbol, Tok Oper),
(spaces, Tok Ignore)
]
show : WithBounds (Token Kind) -> String
show t@(MkBounded (Tok _ v) _ _) = "\{show $ start t} \{show v}"
main : IO ()
main = do
let toks = filter (not . ignore) $ fst $ lex tmap "let x = 1\n y = 2\n in x + y"
traverse_ (putStrLn . show) toks

55
src/Lib/Tok2.idr Normal file
View File

@@ -0,0 +1,55 @@
module Lib.Tok2
import Text.Bounded
data Kind
= Ident
| Keyword
| Oper
| Number
| Symbol
| Arrow
| Ignore
| EOF
data Token = Tok Kind String
Show Kind where
show Ident = "Ident"
show Keyword = "Keyword"
show Oper = "Oper"
show Number = "Number"
show Symbol = "Symbol"
show Arrow = "Arrow"
show Ignore = "Ignore"
show EOF = "EOF"
Show Token where show (Tok k v) = "<\{show k}:\{show v}>"
keywords : List String
keywords = ["let", "in", "where", "case"]
specialOps : List String
specialOps = ["->", ":"]
checkKW : String -> Token
checkKW s = if elem s keywords then Tok Keyword s else Tok Ident s
opkind : String -> Kind
opkind "->" = Arrow
opkind _ = Oper
isOpChar : Char -> Bool
isOpChar c = c `elem` (unpack ":!#$%&*+./<=>?@\\^|-~")
nextToken : Int -> Int -> List Char -> Maybe (Token, (Int, Int, List Char))
nextToken row col xs = case xs of
[] => Nothing
(' ' :: cs) => nextToken row (col + 1) cs
(c :: cs) =>
if isDigit c then getTok (Tok Number) isDigit [c] cs
else if isOpChar c then getTok else ?Foo
where
getTok : (String -> Token) -> (Char -> Bool) -> List Char -> List Char -> Maybe (Token, (Int, Int, List Char))

55
src/Lib/Token.idr Normal file
View File

@@ -0,0 +1,55 @@
module Lib.Token
import public Text.Lexer
public export
data Kind
= Ident
| Keyword
| Oper
| Number
| Symbol
| Arrow
| Space
| LBrace
| Semi
| RBrace
export
Show Kind where
show Ident = "Ident"
show Keyword = "Keyword"
show Oper = "Oper"
show Number = "Number"
show Symbol = "Symbol"
show Arrow = "Arrow"
show Space = "Space"
show LBrace = "LBrace"
show Semi = "Semi"
show RBrace = "RBrace"
export
Eq Kind where
Ident == Ident = True
Keyword == Keyword = True
Oper == Oper = True
Number == Number = True
Symbol == Symbol = True
Arrow == Arrow = True
Space == Space = True
LBrace == LBrace = True
Semi == Semi = True
RBrace == RBrace = True
_ == _ = False
export
Show k => Show (Token k) where
show (Tok k v) = "<\{show k}:\{show v}>"
public export
BTok : Type
BTok = WithBounds (Token Kind)
export
value : BTok -> String
value (MkBounded (Tok _ s) _ _) = s

54
src/Lib/Tokenizer.idr Normal file
View File

@@ -0,0 +1,54 @@
module Lib.Tokenizer
import Text.Lexer
import Text.Lexer.Tokenizer
import Lib.Token
keywords : List String
keywords = ["let", "in", "where", "case", "of", "data"]
specialOps : List String
specialOps = ["->", ":"]
checkKW : String -> Token Kind
checkKW s = if elem s keywords then Tok Keyword s else Tok Ident s
opkind : String -> Kind
opkind "->" = Arrow
opkind _ = Oper
isOpChar : Char -> Bool
isOpChar c = c `elem` (unpack ":!#$%&*+./<=>?@\\^|-~")
opChar : Lexer
opChar = pred isOpChar
-- so Text.Lexer.Core.lex is broken
-- tmap : TokenMap (Token Kind)
-- tmap = [
-- (alpha <+> many alphaNum, checkKW),
-- (some digit, Tok Number),
-- (some opChar, \s => Tok (opkind s) s),
-- (lineComment (exact "--"), Tok Space),
-- (symbol, Tok Symbol),
-- (spaces, Tok Space)
-- ]
rawTokens : Tokenizer (Token Kind)
rawTokens
= match (alpha <+> many alphaNum) checkKW
<|> match (some digit) (Tok Number)
<|> match (some opChar) (\s => Tok (opkind s) s)
<|> match (lineComment (exact "--")) (Tok Space)
<|> match symbol (Tok Symbol)
<|> match spaces (Tok Space)
notSpace : WithBounds (Token Kind) -> Bool
notSpace (MkBounded (Tok Space _) _ _) = False
notSpace _ = True
export
tokenise : String -> List BTok
tokenise = filter notSpace . fst . lex rawTokens

78
src/Lib/Tokenizer2.idr Normal file
View File

@@ -0,0 +1,78 @@
module Lib.Tokenizer2
-- Starting to write from-scratch tokenizer to flatten out the data a little.
data Position = Pos Int Int
inc : Position -> Position
inc (Pos l c) = Pos l (c + 1)
public export
data Kind
= Ident
| Keyword
| Oper
| Comment
| Number
| Symbol
| Arrow
-- virtual stuff deprecated?
| LBrace
| Semi
| RBrace
| EOF
data Token = Tok Int Int Kind String
data Lexer
= Match (Char -> Bool)
| Seq Lexer
keywords : List String
keywords = ["let", "in", "where", "case", "of", "data"]
specialOps : List String
specialOps = ["->", ":"]
-- checkKW : String -> Token Kind
-- checkKW s = if elem s keywords then Tok Keyword s else Tok Ident s
opkind : String -> Kind
opkind "->" = Arrow
opkind _ = Oper
isOpChar : Char -> Bool
isOpChar c = c `elem` (unpack ":!#$%&*+./<=>?@\\^|-~")
-- opChar : Lexer
-- opChar = pred isOpChar
token : Int -> Int -> List Char -> (Token, List Char)
token l c cs = case cs of
[] => (Tok l c EOF "",[])
('-' :: '-' :: cs) => scan (/= '\n') ['-','-'] l (c+2) cs
_ => ?fixme
where
scan : (Char -> Bool) -> (List Char) -> Int -> Int -> List Char -> (Token,List Char)
-- match could be a Maybe thing and use the maybe Alt impl.
-- Not sure what shape I want here. Maybe a straight up Character Parser and drop tokenization.
-- rawTokens : Tokenizer (Token Kind)
-- rawTokens
-- = match (alpha <+> many alphaNum) checkKW
-- <|> match (some digit) (Tok Number)
-- <|> match (some opChar) (\s => Tok (opkind s) s)
-- <|> match (lineComment (exact "--")) (Tok Space)
-- <|> match symbol (Tok Symbol)
-- <|> match spaces (Tok Space)
-- export
-- tokenise : String -> List BTok
-- tokenise = filter notSpace . fst . lex rawTokens

28
src/Main.idr Normal file
View File

@@ -0,0 +1,28 @@
module Main
import Lib.Tokenizer
import Lib.Layout
import Lib.Token
src = "let x = 1\n y = 2\n in x + y"
check : String -> IO ()
check src = do
putStrLn "--"
printLn $ src
let toks = tokenise src
printLn $ toks
let toks2 = layout toks
printLn $ map value toks2
main : IO ()
main = do
-- this stuff is working with layout, should I bother with col.
-- downside is that it will be somewhat picky about the indentation of `in`
-- The sixty stuff looks promising. I might want my own tokenizer though.
check "let x = 1\n y = 2\n in x + y"
check "let x = 1 in x + 2"
check "case x of\n True => something\n False => let\n x = 1\n y = 2\n in x + y"

58
src/Syntax.idr Normal file
View File

@@ -0,0 +1,58 @@
module Syntax
-- Good enough start, lets parse
-- This is informed by pi-forall and others and is somewhat low level
Name = String
data Term : Type where
TyTerm = Term
data Literal = LString String | LInt Int | LBool Bool
data RigCount = Rig0 | RigW
data Plicity = Implicit | Explicit | Eq
public export
data Pattern
= PatVar Name
| PatCon Name (List (Pattern, RigCount))
| PatWild
| PatLit Literal
-- could be a pair, but I suspect stuff will be added?
public export
data CaseAlt = MkAlt Pattern Term
public export
data Term
= Var Name
| Ann Term TyTerm
| Lit Literal
| Let Name Term Term
| Pi Name Plicity Term Term
| App Term Term
| Lam Pattern Term
| Case Term (List CaseAlt)
| Wildcard
| ParseError String
-- derive some stuff - I'd like json, eq, show, ...
data Decl : Type where
Telescope: Type
Telescope = List Decl -- pi-forall, always typeSig?
data ConstrDef = MkCDef Name Telescope
data Decl
= TypeSig Name RigCount TyTerm
| Def Name Term
| Data Name Telescope (List ConstrDef)
record Module where
constructor MkModule
name : Name
imports : List Name
decls : List Decl