chkpt
This commit is contained in:
47
newt.ipkg
Normal file
47
newt.ipkg
Normal file
@@ -0,0 +1,47 @@
|
|||||||
|
package newt
|
||||||
|
version = 0.1.0
|
||||||
|
authors = "Steve Dunham"
|
||||||
|
-- maintainers =
|
||||||
|
-- license =
|
||||||
|
-- brief =
|
||||||
|
-- readme =
|
||||||
|
-- homepage =
|
||||||
|
-- sourceloc =
|
||||||
|
-- bugtracker =
|
||||||
|
|
||||||
|
-- the Idris2 version required (e.g. langversion >= 0.5.1)
|
||||||
|
-- langversion
|
||||||
|
|
||||||
|
-- packages to add to search path
|
||||||
|
depends = contrib
|
||||||
|
|
||||||
|
-- modules to install
|
||||||
|
-- modules =
|
||||||
|
|
||||||
|
-- main file (i.e. file to load at REPL)
|
||||||
|
main = Main
|
||||||
|
|
||||||
|
-- name of executable
|
||||||
|
executable = "newt"
|
||||||
|
-- opts =
|
||||||
|
sourcedir = "src"
|
||||||
|
-- builddir =
|
||||||
|
-- outputdir =
|
||||||
|
|
||||||
|
-- script to run before building
|
||||||
|
-- prebuild =
|
||||||
|
|
||||||
|
-- script to run after building
|
||||||
|
-- postbuild =
|
||||||
|
|
||||||
|
-- script to run after building, before installing
|
||||||
|
-- preinstall =
|
||||||
|
|
||||||
|
-- script to run after installing
|
||||||
|
-- postinstall =
|
||||||
|
|
||||||
|
-- script to run before cleaning
|
||||||
|
-- preclean =
|
||||||
|
|
||||||
|
-- script to run after cleaning
|
||||||
|
-- postclean =
|
||||||
45
src/Lib/Grammar.idr
Normal file
45
src/Lib/Grammar.idr
Normal file
@@ -0,0 +1,45 @@
|
|||||||
|
module Lib.Grammar
|
||||||
|
|
||||||
|
-- Derived from Parser/Core.idr - revisit later when we want to be stack safe
|
||||||
|
-- For simplicity (for now), it drops consumed flag from the type, which I think was
|
||||||
|
-- there for totality checking. May want it back someday.
|
||||||
|
|
||||||
|
data Grammar : (state : Type) -> (tok : Type) -> Type -> Type where
|
||||||
|
EOF : Grammar state tok ()
|
||||||
|
Empty : a -> Grammar state tok a
|
||||||
|
Fail : String -> Grammar state tok a
|
||||||
|
Commit : Grammar state tok ()
|
||||||
|
Alt : Grammar state tok ty -> Lazy (Grammar state tok ty) -> Grammar state tok ty
|
||||||
|
Seq : Grammar state tok a -> (a -> Grammar state tok b) -> Grammar state tok b
|
||||||
|
|
||||||
|
Functor (Grammar state tok) where
|
||||||
|
map f (Fail str) = Fail str
|
||||||
|
map f (Alt x y) = Alt (map f x) (map f y)
|
||||||
|
map f (Seq x g) = ?todo_4
|
||||||
|
map f (Empty v) = Empty (f v)
|
||||||
|
-- Empty / EOF
|
||||||
|
map f p = Seq p (Empty . f)
|
||||||
|
|
||||||
|
Applicative (Grammar state tok) where
|
||||||
|
pure a = Empty a
|
||||||
|
x <*> y = ?hole3
|
||||||
|
|
||||||
|
Alternative (Grammar state tok) where
|
||||||
|
empty = ?hole2
|
||||||
|
(<|>) = Alt
|
||||||
|
|
||||||
|
Monad (Grammar state tok) where
|
||||||
|
(>>=) = Seq
|
||||||
|
|
||||||
|
ParseError : Type
|
||||||
|
|
||||||
|
data ParseResult : Type -> Type -> Type -> Type where
|
||||||
|
Failure : (commit : Bool) -> List ParseError -> ParseResult state tok ty
|
||||||
|
|
||||||
|
doParse : Semigroup state =>
|
||||||
|
state ->
|
||||||
|
(commit : Bool) ->
|
||||||
|
Grammar state tok ty ->
|
||||||
|
List tok ->
|
||||||
|
ParseResult state tok ty
|
||||||
|
doParse = ?todo
|
||||||
48
src/Lib/Layout.idr
Normal file
48
src/Lib/Layout.idr
Normal file
@@ -0,0 +1,48 @@
|
|||||||
|
module Lib.Layout
|
||||||
|
|
||||||
|
import Lib.Token
|
||||||
|
import Text.Lexer
|
||||||
|
|
||||||
|
-- Dunno if I'll do layout or pass col, but here it is:
|
||||||
|
|
||||||
|
intro : List String
|
||||||
|
intro = [ "where", "let", "of" ]
|
||||||
|
|
||||||
|
Position : Type
|
||||||
|
Position = (Int,Int)
|
||||||
|
|
||||||
|
isIntro : BTok -> Bool
|
||||||
|
isIntro t = elem (value t) intro
|
||||||
|
|
||||||
|
export
|
||||||
|
layout : List BTok -> List BTok
|
||||||
|
layout = go [] []
|
||||||
|
where
|
||||||
|
lbrace = irrelevantBounds (Tok LBrace "{")
|
||||||
|
semi = irrelevantBounds (Tok Semi ";")
|
||||||
|
rbrace = irrelevantBounds (Tok RBrace "}")
|
||||||
|
|
||||||
|
-- go' does start of block
|
||||||
|
go' : List BTok -> List Position -> List BTok -> List BTok
|
||||||
|
|
||||||
|
-- go does semi and end of block, we require "in" to be on separate, outdented line if not the same line as let
|
||||||
|
go : List BTok -> List Position -> List BTok -> List BTok
|
||||||
|
go acc lvls [] = reverse acc
|
||||||
|
go acc [] ts = go' acc [] ts
|
||||||
|
go acc lvls@(lvl::rest) toks@(t :: ts) =
|
||||||
|
let (sr,sc) = start t in
|
||||||
|
if snd lvl > sc then go (rbrace :: acc) rest toks
|
||||||
|
else if snd lvl == sc then go' (semi :: acc) lvls toks
|
||||||
|
else if value t == "in" && sr == fst lvl then go' (rbrace :: acc) rest toks
|
||||||
|
else go' acc lvls toks
|
||||||
|
|
||||||
|
-- handle start of block
|
||||||
|
go' acc lvls [] = reverse acc
|
||||||
|
go' acc lvls (t::ts) = case isIntro t of
|
||||||
|
False => go (t::acc) lvls ts
|
||||||
|
True => case ts of
|
||||||
|
t'::ts' => go (t' :: lbrace :: t :: acc) (start t' :: lvls) ts'
|
||||||
|
[] => go (rbrace :: lbrace :: t :: acc) lvls ts
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
56
src/Lib/Lexer2.idr
Normal file
56
src/Lib/Lexer2.idr
Normal file
@@ -0,0 +1,56 @@
|
|||||||
|
module Lib.Lexer2
|
||||||
|
|
||||||
|
data UToken
|
||||||
|
= Ident String
|
||||||
|
| Let
|
||||||
|
| In
|
||||||
|
| Data
|
||||||
|
| Where
|
||||||
|
| Forall
|
||||||
|
| Case
|
||||||
|
| Of
|
||||||
|
| Underscore
|
||||||
|
| Operator String
|
||||||
|
| Equals
|
||||||
|
| Dot
|
||||||
|
| Colon
|
||||||
|
| Pipe
|
||||||
|
| RightArrow
|
||||||
|
| FatArrow
|
||||||
|
| Number Int
|
||||||
|
| Lambda
|
||||||
|
| LeftParen
|
||||||
|
| RightParen
|
||||||
|
| LeftBrace
|
||||||
|
| RightBrace
|
||||||
|
| Error
|
||||||
|
|
||||||
|
-- we can skip this if we keep the text...
|
||||||
|
|
||||||
|
Show UToken where
|
||||||
|
show = \case
|
||||||
|
Ident name => name
|
||||||
|
Let => "let"
|
||||||
|
In => "in"
|
||||||
|
Data => "data"
|
||||||
|
Where => "where"
|
||||||
|
Forall => "forall"
|
||||||
|
Case => "case"
|
||||||
|
Of => "of"
|
||||||
|
Underscore => "_"
|
||||||
|
Operator name => name
|
||||||
|
Equals => ?rhs_10
|
||||||
|
Dot => ?rhs_11
|
||||||
|
Colon => ?rhs_12
|
||||||
|
Pipe => ?rhs_13
|
||||||
|
RightArrow => ?rhs_14
|
||||||
|
FatArrow => ?rhs_15
|
||||||
|
Number i => show i
|
||||||
|
Lambda => ?rhs_17
|
||||||
|
LeftParen => ?rhs_18
|
||||||
|
RightParen => ?rhs_19
|
||||||
|
LeftBrace => ?rhs_20
|
||||||
|
RightBrace => ?rhs_21
|
||||||
|
Error => ?rhs_22
|
||||||
|
|
||||||
|
|
||||||
81
src/Lib/Parser.idr
Normal file
81
src/Lib/Parser.idr
Normal file
@@ -0,0 +1,81 @@
|
|||||||
|
module Lib.Parser
|
||||||
|
|
||||||
|
|
||||||
|
import Lib.Token
|
||||||
|
import Lib.Parser.Impl
|
||||||
|
import Syntax
|
||||||
|
|
||||||
|
-- There is the whole core vs surface thing here.
|
||||||
|
-- might be best to do core first/ Technically don't
|
||||||
|
-- _need_ a parser, but would be useful for testing.
|
||||||
|
|
||||||
|
-- look to pi-forall / ezoo, but I think we start with a
|
||||||
|
-- TTImpl level grammar, then add a more sugared layer above
|
||||||
|
-- so holes and all that
|
||||||
|
|
||||||
|
-- After the parser runs, see below, take a break and finish pi-forall
|
||||||
|
-- exercises. There is some fill in the parser stuff that may show
|
||||||
|
-- the future.
|
||||||
|
|
||||||
|
ident = token Ident
|
||||||
|
|
||||||
|
term : Parser Term
|
||||||
|
|
||||||
|
letExpr : Parser Term
|
||||||
|
letExpr = Let <$ keyword "let" <*> ident <* keyword "=" <*> term <* keyword "in" <*> term
|
||||||
|
|
||||||
|
pattern : Parser Pattern
|
||||||
|
|
||||||
|
lamExpr : Parser Term
|
||||||
|
lamExpr = do
|
||||||
|
keyword "\\"
|
||||||
|
commit
|
||||||
|
name <- pattern
|
||||||
|
keyword "."
|
||||||
|
scope <- term
|
||||||
|
pure $ Lam name scope
|
||||||
|
|
||||||
|
caseAlt : Parser CaseAlt
|
||||||
|
|
||||||
|
caseExpr : Parser Term
|
||||||
|
caseExpr = do
|
||||||
|
_ <- keyword "case"
|
||||||
|
commit
|
||||||
|
sc <- term
|
||||||
|
keyword "of"
|
||||||
|
alts <- startBlock $ someSame $ caseAlt
|
||||||
|
pure $ Case sc alts
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
-- TODO - get this up and running with a case expr to test it
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
{-
|
||||||
|
so lets say we wanted to do the indent, the hard way.
|
||||||
|
|
||||||
|
what does this look like
|
||||||
|
|
||||||
|
We want to say:
|
||||||
|
kw 'let' $ indentedSome assignment
|
||||||
|
|
||||||
|
now assignment is going to be whatever, but we need to make sure the initial token can/must be at
|
||||||
|
current indentation level.
|
||||||
|
|
||||||
|
Ok idris indent is not Col, it's AnyIndent | AtPos Int | AfterPos Int
|
||||||
|
|
||||||
|
The col though is somehow sixty.
|
||||||
|
|
||||||
|
sixten is sameCol, dropAnchor, with an "environment"
|
||||||
|
|
||||||
|
|
||||||
|
sixty is Position is line and col and either line matches or col < tokenCol.
|
||||||
|
|
||||||
|
that's maybe doable..
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
-}
|
||||||
|
|
||||||
139
src/Lib/Parser/Impl.idr
Normal file
139
src/Lib/Parser/Impl.idr
Normal file
@@ -0,0 +1,139 @@
|
|||||||
|
module Lib.Parser.Impl
|
||||||
|
|
||||||
|
import Lib.Token
|
||||||
|
|
||||||
|
public export
|
||||||
|
TokenList : Type
|
||||||
|
TokenList = List BTok
|
||||||
|
|
||||||
|
-- Error of a parse
|
||||||
|
public export
|
||||||
|
data Error = E String
|
||||||
|
%name Error err
|
||||||
|
|
||||||
|
-- Result of a parse
|
||||||
|
public export
|
||||||
|
data Result : Type -> Type where
|
||||||
|
OK : a -> (toks : TokenList) -> (com : Bool) -> Result a
|
||||||
|
Fail : Error -> (toks : TokenList) -> (com : Bool) -> Result a
|
||||||
|
|
||||||
|
export
|
||||||
|
Functor Result where
|
||||||
|
map f (OK a toks com ) = OK (f a) toks com
|
||||||
|
map _ (Fail err toks com) = Fail err toks com
|
||||||
|
|
||||||
|
-- So sixty just has a newtype function here now (probably for perf).
|
||||||
|
-- A record might be more ergonomic, but would require a record impl before
|
||||||
|
-- self hosting.
|
||||||
|
|
||||||
|
-- We keep the line and column for indents. The idea being that we check
|
||||||
|
-- either the col < tokCol or line == tokLine, enabling sameLevel.
|
||||||
|
|
||||||
|
-- dunno why I'm making that a pair..
|
||||||
|
export
|
||||||
|
data Parser a = P (TokenList -> Bool -> (lc : (Int,Int)) -> Result a)
|
||||||
|
|
||||||
|
runP : Parser a -> TokenList -> Bool -> (Int,Int) -> Result a
|
||||||
|
runP (P f) = f
|
||||||
|
|
||||||
|
fail : String -> Parser a
|
||||||
|
fail msg = P $ \toks,com,col => Fail (E msg) toks com
|
||||||
|
|
||||||
|
export
|
||||||
|
Functor Parser where
|
||||||
|
map f (P pa) = P $ \ toks, com, col => map f (pa toks com col)
|
||||||
|
|
||||||
|
export
|
||||||
|
Applicative Parser where
|
||||||
|
pure pa = P (\ toks, com, col => OK pa toks com)
|
||||||
|
P pab <*> P pa = P $ \toks,com,col =>
|
||||||
|
case pab toks com col of
|
||||||
|
Fail err toks com => Fail err toks com
|
||||||
|
OK f toks com =>
|
||||||
|
case pa toks com col of
|
||||||
|
(OK x toks com) => OK (f x) toks com
|
||||||
|
(Fail err toks com) => Fail err toks com
|
||||||
|
|
||||||
|
export
|
||||||
|
Alternative Parser where
|
||||||
|
empty = fail "empty"
|
||||||
|
(P pa) <|> (P pb) = P $ \toks,com,col =>
|
||||||
|
case pa toks com col of
|
||||||
|
f@(Fail _ _ com') => if com' then f else pb toks com col
|
||||||
|
t => t
|
||||||
|
|
||||||
|
export
|
||||||
|
Monad Parser where
|
||||||
|
P pa >>= pab = P $ \toks,com,col =>
|
||||||
|
case pa toks com col of
|
||||||
|
(OK a toks com) => runP (pab a) toks com col
|
||||||
|
(Fail err xs x) => ?rhs_1
|
||||||
|
|
||||||
|
|
||||||
|
-- do we need this?
|
||||||
|
pred : (BTok -> Bool) -> String -> Parser String
|
||||||
|
pred f msg = P $ \toks,com,col =>
|
||||||
|
case toks of
|
||||||
|
(t :: ts) => if f t then OK (value t) ts com else Fail (E msg) toks com
|
||||||
|
[] => Fail (E "eof") toks com
|
||||||
|
|
||||||
|
export
|
||||||
|
commit : Parser ()
|
||||||
|
commit = P $ \toks,com,col => OK () toks True
|
||||||
|
|
||||||
|
export
|
||||||
|
token : Kind -> Parser String
|
||||||
|
token k = pred (\t => t.val.kind == k) "Expected a \{show k}"
|
||||||
|
|
||||||
|
|
||||||
|
export
|
||||||
|
keyword : String -> Parser ()
|
||||||
|
keyword kw = ignore $ pred (\t => t.val.text == kw) "Expected \{kw}"
|
||||||
|
|
||||||
|
|
||||||
|
many : Parser a -> Parser (List a)
|
||||||
|
some : Parser a -> Parser (List a)
|
||||||
|
some p = (::) <$> p <*> many p
|
||||||
|
many p = some p <|> pure []
|
||||||
|
|
||||||
|
-- sixty let has some weird CPS stuff, but essentially:
|
||||||
|
|
||||||
|
-- case token_ of
|
||||||
|
-- Lexer.LLet -> PLet <$> blockOfMany let_ <* token Lexer.In <*> term
|
||||||
|
|
||||||
|
-- withIndentationBlock - sets the col
|
||||||
|
|
||||||
|
||| Start an indented block and run parser in it
|
||||||
|
export
|
||||||
|
startBlock : Parser a -> Parser a
|
||||||
|
startBlock (P p) = P $ \toks,com,(l,c) => case toks of
|
||||||
|
[] => p toks com (l,c)
|
||||||
|
(t :: ts) =>
|
||||||
|
let (tl,tc) = start t
|
||||||
|
in p toks com (tl,tc)
|
||||||
|
|
||||||
|
||| Assert that parser starts at our current column by
|
||||||
|
||| checking column and then updating line to match the current
|
||||||
|
export
|
||||||
|
sameLevel : Parser a -> Parser a
|
||||||
|
sameLevel (P p) = P $ \toks,com,(l,c) => case toks of
|
||||||
|
[] => p toks com (l,c)
|
||||||
|
(t :: ts) =>
|
||||||
|
let (tl,tc) = start t
|
||||||
|
in if tc == c then p toks com (tl, c)
|
||||||
|
else if c < tc then Fail (E "unexpected indent") toks com
|
||||||
|
else Fail (E "unexpected indent") toks com
|
||||||
|
|
||||||
|
export
|
||||||
|
someSame : Parser a -> Parser (List a)
|
||||||
|
someSame = some . sameLevel
|
||||||
|
|
||||||
|
||| requires a token to be indented?
|
||||||
|
indented : Parser a -> Parser a
|
||||||
|
indented (P p) = P $ \toks,com,(l,c) => case toks of
|
||||||
|
[] => p toks com (l,c)
|
||||||
|
(t::ts) =>
|
||||||
|
let (tl,tc) = start t
|
||||||
|
in if tc > c || tl == l then p toks com (l,c)
|
||||||
|
else Fail (E "unexpected outdent") toks com
|
||||||
|
|
||||||
23
src/Lib/Test.idr
Normal file
23
src/Lib/Test.idr
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
import Text.Lexer
|
||||||
|
|
||||||
|
data Kind = Ignore | Ident | Oper | Number
|
||||||
|
|
||||||
|
ignore : WithBounds (Token Kind) -> Bool
|
||||||
|
ignore (MkBounded (Tok Ignore _) _ _) = True
|
||||||
|
ignore _ = False
|
||||||
|
|
||||||
|
tmap : TokenMap (Token Kind)
|
||||||
|
tmap = [
|
||||||
|
(alpha <+> many alphaNum, Tok Ident),
|
||||||
|
(some digit, Tok Number),
|
||||||
|
(some symbol, Tok Oper),
|
||||||
|
(spaces, Tok Ignore)
|
||||||
|
]
|
||||||
|
|
||||||
|
show : WithBounds (Token Kind) -> String
|
||||||
|
show t@(MkBounded (Tok _ v) _ _) = "\{show $ start t} \{show v}"
|
||||||
|
|
||||||
|
main : IO ()
|
||||||
|
main = do
|
||||||
|
let toks = filter (not . ignore) $ fst $ lex tmap "let x = 1\n y = 2\n in x + y"
|
||||||
|
traverse_ (putStrLn . show) toks
|
||||||
55
src/Lib/Tok2.idr
Normal file
55
src/Lib/Tok2.idr
Normal file
@@ -0,0 +1,55 @@
|
|||||||
|
module Lib.Tok2
|
||||||
|
|
||||||
|
import Text.Bounded
|
||||||
|
|
||||||
|
data Kind
|
||||||
|
= Ident
|
||||||
|
| Keyword
|
||||||
|
| Oper
|
||||||
|
| Number
|
||||||
|
| Symbol
|
||||||
|
| Arrow
|
||||||
|
| Ignore
|
||||||
|
| EOF
|
||||||
|
|
||||||
|
data Token = Tok Kind String
|
||||||
|
|
||||||
|
Show Kind where
|
||||||
|
show Ident = "Ident"
|
||||||
|
show Keyword = "Keyword"
|
||||||
|
show Oper = "Oper"
|
||||||
|
show Number = "Number"
|
||||||
|
show Symbol = "Symbol"
|
||||||
|
show Arrow = "Arrow"
|
||||||
|
show Ignore = "Ignore"
|
||||||
|
show EOF = "EOF"
|
||||||
|
|
||||||
|
Show Token where show (Tok k v) = "<\{show k}:\{show v}>"
|
||||||
|
|
||||||
|
keywords : List String
|
||||||
|
keywords = ["let", "in", "where", "case"]
|
||||||
|
|
||||||
|
specialOps : List String
|
||||||
|
specialOps = ["->", ":"]
|
||||||
|
|
||||||
|
checkKW : String -> Token
|
||||||
|
checkKW s = if elem s keywords then Tok Keyword s else Tok Ident s
|
||||||
|
|
||||||
|
opkind : String -> Kind
|
||||||
|
opkind "->" = Arrow
|
||||||
|
opkind _ = Oper
|
||||||
|
|
||||||
|
isOpChar : Char -> Bool
|
||||||
|
isOpChar c = c `elem` (unpack ":!#$%&*+./<=>?@\\^|-~")
|
||||||
|
|
||||||
|
nextToken : Int -> Int -> List Char -> Maybe (Token, (Int, Int, List Char))
|
||||||
|
nextToken row col xs = case xs of
|
||||||
|
[] => Nothing
|
||||||
|
(' ' :: cs) => nextToken row (col + 1) cs
|
||||||
|
(c :: cs) =>
|
||||||
|
if isDigit c then getTok (Tok Number) isDigit [c] cs
|
||||||
|
else if isOpChar c then getTok else ?Foo
|
||||||
|
where
|
||||||
|
getTok : (String -> Token) -> (Char -> Bool) -> List Char -> List Char -> Maybe (Token, (Int, Int, List Char))
|
||||||
|
|
||||||
|
|
||||||
55
src/Lib/Token.idr
Normal file
55
src/Lib/Token.idr
Normal file
@@ -0,0 +1,55 @@
|
|||||||
|
module Lib.Token
|
||||||
|
|
||||||
|
import public Text.Lexer
|
||||||
|
|
||||||
|
public export
|
||||||
|
data Kind
|
||||||
|
= Ident
|
||||||
|
| Keyword
|
||||||
|
| Oper
|
||||||
|
| Number
|
||||||
|
| Symbol
|
||||||
|
| Arrow
|
||||||
|
| Space
|
||||||
|
| LBrace
|
||||||
|
| Semi
|
||||||
|
| RBrace
|
||||||
|
|
||||||
|
export
|
||||||
|
Show Kind where
|
||||||
|
show Ident = "Ident"
|
||||||
|
show Keyword = "Keyword"
|
||||||
|
show Oper = "Oper"
|
||||||
|
show Number = "Number"
|
||||||
|
show Symbol = "Symbol"
|
||||||
|
show Arrow = "Arrow"
|
||||||
|
show Space = "Space"
|
||||||
|
show LBrace = "LBrace"
|
||||||
|
show Semi = "Semi"
|
||||||
|
show RBrace = "RBrace"
|
||||||
|
|
||||||
|
export
|
||||||
|
Eq Kind where
|
||||||
|
Ident == Ident = True
|
||||||
|
Keyword == Keyword = True
|
||||||
|
Oper == Oper = True
|
||||||
|
Number == Number = True
|
||||||
|
Symbol == Symbol = True
|
||||||
|
Arrow == Arrow = True
|
||||||
|
Space == Space = True
|
||||||
|
LBrace == LBrace = True
|
||||||
|
Semi == Semi = True
|
||||||
|
RBrace == RBrace = True
|
||||||
|
_ == _ = False
|
||||||
|
|
||||||
|
export
|
||||||
|
Show k => Show (Token k) where
|
||||||
|
show (Tok k v) = "<\{show k}:\{show v}>"
|
||||||
|
|
||||||
|
public export
|
||||||
|
BTok : Type
|
||||||
|
BTok = WithBounds (Token Kind)
|
||||||
|
|
||||||
|
export
|
||||||
|
value : BTok -> String
|
||||||
|
value (MkBounded (Tok _ s) _ _) = s
|
||||||
54
src/Lib/Tokenizer.idr
Normal file
54
src/Lib/Tokenizer.idr
Normal file
@@ -0,0 +1,54 @@
|
|||||||
|
module Lib.Tokenizer
|
||||||
|
|
||||||
|
import Text.Lexer
|
||||||
|
import Text.Lexer.Tokenizer
|
||||||
|
import Lib.Token
|
||||||
|
|
||||||
|
keywords : List String
|
||||||
|
keywords = ["let", "in", "where", "case", "of", "data"]
|
||||||
|
|
||||||
|
specialOps : List String
|
||||||
|
specialOps = ["->", ":"]
|
||||||
|
|
||||||
|
checkKW : String -> Token Kind
|
||||||
|
checkKW s = if elem s keywords then Tok Keyword s else Tok Ident s
|
||||||
|
|
||||||
|
opkind : String -> Kind
|
||||||
|
opkind "->" = Arrow
|
||||||
|
opkind _ = Oper
|
||||||
|
|
||||||
|
isOpChar : Char -> Bool
|
||||||
|
isOpChar c = c `elem` (unpack ":!#$%&*+./<=>?@\\^|-~")
|
||||||
|
|
||||||
|
opChar : Lexer
|
||||||
|
opChar = pred isOpChar
|
||||||
|
|
||||||
|
-- so Text.Lexer.Core.lex is broken
|
||||||
|
-- tmap : TokenMap (Token Kind)
|
||||||
|
-- tmap = [
|
||||||
|
-- (alpha <+> many alphaNum, checkKW),
|
||||||
|
-- (some digit, Tok Number),
|
||||||
|
-- (some opChar, \s => Tok (opkind s) s),
|
||||||
|
-- (lineComment (exact "--"), Tok Space),
|
||||||
|
-- (symbol, Tok Symbol),
|
||||||
|
-- (spaces, Tok Space)
|
||||||
|
-- ]
|
||||||
|
|
||||||
|
rawTokens : Tokenizer (Token Kind)
|
||||||
|
rawTokens
|
||||||
|
= match (alpha <+> many alphaNum) checkKW
|
||||||
|
<|> match (some digit) (Tok Number)
|
||||||
|
<|> match (some opChar) (\s => Tok (opkind s) s)
|
||||||
|
<|> match (lineComment (exact "--")) (Tok Space)
|
||||||
|
<|> match symbol (Tok Symbol)
|
||||||
|
<|> match spaces (Tok Space)
|
||||||
|
|
||||||
|
notSpace : WithBounds (Token Kind) -> Bool
|
||||||
|
notSpace (MkBounded (Tok Space _) _ _) = False
|
||||||
|
notSpace _ = True
|
||||||
|
|
||||||
|
export
|
||||||
|
tokenise : String -> List BTok
|
||||||
|
tokenise = filter notSpace . fst . lex rawTokens
|
||||||
|
|
||||||
|
|
||||||
78
src/Lib/Tokenizer2.idr
Normal file
78
src/Lib/Tokenizer2.idr
Normal file
@@ -0,0 +1,78 @@
|
|||||||
|
module Lib.Tokenizer2
|
||||||
|
|
||||||
|
-- Starting to write from-scratch tokenizer to flatten out the data a little.
|
||||||
|
|
||||||
|
data Position = Pos Int Int
|
||||||
|
|
||||||
|
inc : Position -> Position
|
||||||
|
inc (Pos l c) = Pos l (c + 1)
|
||||||
|
|
||||||
|
|
||||||
|
public export
|
||||||
|
data Kind
|
||||||
|
= Ident
|
||||||
|
| Keyword
|
||||||
|
| Oper
|
||||||
|
| Comment
|
||||||
|
| Number
|
||||||
|
| Symbol
|
||||||
|
| Arrow
|
||||||
|
-- virtual stuff deprecated?
|
||||||
|
| LBrace
|
||||||
|
| Semi
|
||||||
|
| RBrace
|
||||||
|
| EOF
|
||||||
|
|
||||||
|
data Token = Tok Int Int Kind String
|
||||||
|
|
||||||
|
data Lexer
|
||||||
|
= Match (Char -> Bool)
|
||||||
|
| Seq Lexer
|
||||||
|
|
||||||
|
keywords : List String
|
||||||
|
keywords = ["let", "in", "where", "case", "of", "data"]
|
||||||
|
|
||||||
|
specialOps : List String
|
||||||
|
specialOps = ["->", ":"]
|
||||||
|
|
||||||
|
-- checkKW : String -> Token Kind
|
||||||
|
-- checkKW s = if elem s keywords then Tok Keyword s else Tok Ident s
|
||||||
|
|
||||||
|
opkind : String -> Kind
|
||||||
|
opkind "->" = Arrow
|
||||||
|
opkind _ = Oper
|
||||||
|
|
||||||
|
isOpChar : Char -> Bool
|
||||||
|
isOpChar c = c `elem` (unpack ":!#$%&*+./<=>?@\\^|-~")
|
||||||
|
|
||||||
|
-- opChar : Lexer
|
||||||
|
-- opChar = pred isOpChar
|
||||||
|
|
||||||
|
token : Int -> Int -> List Char -> (Token, List Char)
|
||||||
|
token l c cs = case cs of
|
||||||
|
[] => (Tok l c EOF "",[])
|
||||||
|
('-' :: '-' :: cs) => scan (/= '\n') ['-','-'] l (c+2) cs
|
||||||
|
_ => ?fixme
|
||||||
|
|
||||||
|
where
|
||||||
|
scan : (Char -> Bool) -> (List Char) -> Int -> Int -> List Char -> (Token,List Char)
|
||||||
|
|
||||||
|
|
||||||
|
-- match could be a Maybe thing and use the maybe Alt impl.
|
||||||
|
-- Not sure what shape I want here. Maybe a straight up Character Parser and drop tokenization.
|
||||||
|
|
||||||
|
-- rawTokens : Tokenizer (Token Kind)
|
||||||
|
-- rawTokens
|
||||||
|
-- = match (alpha <+> many alphaNum) checkKW
|
||||||
|
-- <|> match (some digit) (Tok Number)
|
||||||
|
-- <|> match (some opChar) (\s => Tok (opkind s) s)
|
||||||
|
-- <|> match (lineComment (exact "--")) (Tok Space)
|
||||||
|
-- <|> match symbol (Tok Symbol)
|
||||||
|
-- <|> match spaces (Tok Space)
|
||||||
|
|
||||||
|
|
||||||
|
-- export
|
||||||
|
-- tokenise : String -> List BTok
|
||||||
|
-- tokenise = filter notSpace . fst . lex rawTokens
|
||||||
|
|
||||||
|
|
||||||
28
src/Main.idr
Normal file
28
src/Main.idr
Normal file
@@ -0,0 +1,28 @@
|
|||||||
|
module Main
|
||||||
|
|
||||||
|
import Lib.Tokenizer
|
||||||
|
import Lib.Layout
|
||||||
|
import Lib.Token
|
||||||
|
|
||||||
|
src = "let x = 1\n y = 2\n in x + y"
|
||||||
|
|
||||||
|
check : String -> IO ()
|
||||||
|
check src = do
|
||||||
|
putStrLn "--"
|
||||||
|
printLn $ src
|
||||||
|
let toks = tokenise src
|
||||||
|
|
||||||
|
printLn $ toks
|
||||||
|
let toks2 = layout toks
|
||||||
|
|
||||||
|
printLn $ map value toks2
|
||||||
|
|
||||||
|
main : IO ()
|
||||||
|
main = do
|
||||||
|
-- this stuff is working with layout, should I bother with col.
|
||||||
|
-- downside is that it will be somewhat picky about the indentation of `in`
|
||||||
|
-- The sixty stuff looks promising. I might want my own tokenizer though.
|
||||||
|
check "let x = 1\n y = 2\n in x + y"
|
||||||
|
check "let x = 1 in x + 2"
|
||||||
|
check "case x of\n True => something\n False => let\n x = 1\n y = 2\n in x + y"
|
||||||
|
|
||||||
58
src/Syntax.idr
Normal file
58
src/Syntax.idr
Normal file
@@ -0,0 +1,58 @@
|
|||||||
|
module Syntax
|
||||||
|
|
||||||
|
-- Good enough start, lets parse
|
||||||
|
-- This is informed by pi-forall and others and is somewhat low level
|
||||||
|
|
||||||
|
Name = String
|
||||||
|
|
||||||
|
data Term : Type where
|
||||||
|
|
||||||
|
TyTerm = Term
|
||||||
|
|
||||||
|
data Literal = LString String | LInt Int | LBool Bool
|
||||||
|
data RigCount = Rig0 | RigW
|
||||||
|
data Plicity = Implicit | Explicit | Eq
|
||||||
|
|
||||||
|
public export
|
||||||
|
data Pattern
|
||||||
|
= PatVar Name
|
||||||
|
| PatCon Name (List (Pattern, RigCount))
|
||||||
|
| PatWild
|
||||||
|
| PatLit Literal
|
||||||
|
|
||||||
|
-- could be a pair, but I suspect stuff will be added?
|
||||||
|
public export
|
||||||
|
data CaseAlt = MkAlt Pattern Term
|
||||||
|
|
||||||
|
public export
|
||||||
|
data Term
|
||||||
|
= Var Name
|
||||||
|
| Ann Term TyTerm
|
||||||
|
| Lit Literal
|
||||||
|
| Let Name Term Term
|
||||||
|
| Pi Name Plicity Term Term
|
||||||
|
| App Term Term
|
||||||
|
| Lam Pattern Term
|
||||||
|
| Case Term (List CaseAlt)
|
||||||
|
| Wildcard
|
||||||
|
| ParseError String
|
||||||
|
|
||||||
|
-- derive some stuff - I'd like json, eq, show, ...
|
||||||
|
|
||||||
|
data Decl : Type where
|
||||||
|
|
||||||
|
Telescope: Type
|
||||||
|
Telescope = List Decl -- pi-forall, always typeSig?
|
||||||
|
|
||||||
|
data ConstrDef = MkCDef Name Telescope
|
||||||
|
|
||||||
|
data Decl
|
||||||
|
= TypeSig Name RigCount TyTerm
|
||||||
|
| Def Name Term
|
||||||
|
| Data Name Telescope (List ConstrDef)
|
||||||
|
|
||||||
|
record Module where
|
||||||
|
constructor MkModule
|
||||||
|
name : Name
|
||||||
|
imports : List Name
|
||||||
|
decls : List Decl
|
||||||
Reference in New Issue
Block a user