chkpt

2022-09-08 22:45:07 -07:00
commit 39deff1465
13 changed files with 767 additions and 0 deletions
--- a/src/Lib/Grammar.idr
+++ b/src/Lib/Grammar.idr
@@ -0,0 +1,45 @@
+module Lib.Grammar
+
+-- Derived from Parser/Core.idr - revisit later when we want to be stack safe
+-- For simplicity (for now), it drops consumed flag from the type, which I think was
+-- there for totality checking.  May want it back someday.
+
+data Grammar : (state : Type) -> (tok : Type) -> Type -> Type where
+  EOF : Grammar state tok ()
+  Empty : a -> Grammar state tok a
+  Fail : String -> Grammar state tok a
+  Commit : Grammar state tok ()
+  Alt : Grammar state tok ty -> Lazy (Grammar state tok ty) -> Grammar state tok ty
+  Seq : Grammar state tok a -> (a -> Grammar state tok b) -> Grammar state tok b
+
+Functor (Grammar state tok) where
+  map f (Fail str) = Fail str
+  map f (Alt x y) = Alt (map f x) (map f y)
+  map f (Seq x g) = ?todo_4
+  map f (Empty v) = Empty (f v)
+  -- Empty / EOF
+  map f p = Seq p (Empty . f)
+  
+Applicative (Grammar state tok) where
+  pure a = Empty a
+  x <*> y = ?hole3
+
+Alternative (Grammar state tok) where 
+  empty = ?hole2
+  (<|>) = Alt
+
+Monad (Grammar state tok) where
+  (>>=) = Seq
+
+ParseError : Type
+
+data ParseResult : Type -> Type -> Type -> Type where
+  Failure : (commit : Bool) -> List ParseError -> ParseResult state tok ty
+
+doParse : Semigroup state =>
+          state ->
+          (commit : Bool) ->
+          Grammar state tok ty ->
+          List tok ->
+          ParseResult state tok ty
+doParse = ?todo
--- a/src/Lib/Layout.idr
+++ b/src/Lib/Layout.idr
@@ -0,0 +1,48 @@
+module Lib.Layout
+
+import Lib.Token
+import Text.Lexer
+
+-- Dunno if I'll do layout or pass col, but here it is:
+
+intro : List String
+intro = [ "where", "let", "of" ]
+
+Position : Type
+Position = (Int,Int)
+
+isIntro : BTok -> Bool
+isIntro t = elem (value t) intro
+
+export
+layout : List BTok -> List BTok
+layout = go [] []
+  where
+    lbrace = irrelevantBounds (Tok LBrace "{")
+    semi = irrelevantBounds (Tok Semi ";")
+    rbrace = irrelevantBounds (Tok RBrace "}")
+
+    -- go' does start of block
+    go' : List BTok -> List Position -> List BTok -> List BTok
+
+    -- go does semi and end of block, we require "in" to be on separate, outdented line if not the same line as let
+    go : List BTok -> List Position -> List BTok -> List BTok
+    go acc lvls [] = reverse acc
+    go acc [] ts = go' acc [] ts
+    go acc lvls@(lvl::rest) toks@(t :: ts) =
+      let (sr,sc) = start t in
+      if      snd lvl >  sc then go (rbrace :: acc) rest toks
+      else if snd lvl == sc then go' (semi :: acc) lvls toks
+      else if value t == "in" && sr == fst lvl then go' (rbrace :: acc) rest toks
+      else go' acc lvls toks
+    
+    -- handle start of block
+    go' acc lvls [] = reverse acc
+    go' acc lvls (t::ts) = case isIntro t of
+      False => go (t::acc) lvls ts
+      True => case ts of
+          t'::ts' => go (t' :: lbrace :: t :: acc) (start t' :: lvls) ts'
+          []      => go (rbrace :: lbrace :: t :: acc) lvls ts
+
+      
+      
--- a/src/Lib/Lexer2.idr
+++ b/src/Lib/Lexer2.idr
@@ -0,0 +1,56 @@
+module Lib.Lexer2
+
+data UToken
+  = Ident String
+  | Let
+  | In
+  | Data
+  | Where
+  | Forall
+  | Case
+  | Of
+  | Underscore
+  | Operator String
+  | Equals
+  | Dot
+  | Colon
+  | Pipe
+  | RightArrow
+  | FatArrow
+  | Number Int
+  | Lambda
+  | LeftParen
+  | RightParen
+  | LeftBrace
+  | RightBrace
+  | Error
+
+-- we can skip this if we keep the text...
+
+Show UToken where
+  show = \case
+    Ident name => name
+    Let => "let"
+    In => "in"
+    Data => "data"
+    Where => "where"
+    Forall => "forall"
+    Case => "case"
+    Of => "of"
+    Underscore => "_"
+    Operator name => name
+    Equals => ?rhs_10
+    Dot => ?rhs_11
+    Colon => ?rhs_12
+    Pipe => ?rhs_13
+    RightArrow => ?rhs_14
+    FatArrow => ?rhs_15
+    Number i => show i
+    Lambda => ?rhs_17
+    LeftParen => ?rhs_18
+    RightParen => ?rhs_19
+    LeftBrace => ?rhs_20
+    RightBrace => ?rhs_21
+    Error => ?rhs_22
+
+
--- a/src/Lib/Parser.idr
+++ b/src/Lib/Parser.idr
@@ -0,0 +1,81 @@
+module Lib.Parser
+
+
+import Lib.Token
+import Lib.Parser.Impl
+import Syntax
+
+-- There is the whole core vs surface thing here.
+-- might be best to do core first/ Technically don't
+-- _need_ a parser, but would be useful for testing.
+
+-- look to pi-forall / ezoo, but I think we start with a
+-- TTImpl level grammar, then add a more sugared layer above
+-- so holes and all that
+
+-- After the parser runs, see below, take a break and finish pi-forall
+-- exercises.  There is some fill in the parser stuff that may show 
+-- the future.
+
+ident = token Ident
+
+term : Parser Term
+
+letExpr : Parser Term
+letExpr = Let <$ keyword "let" <*> ident <* keyword "=" <*> term <* keyword "in" <*> term
+
+pattern : Parser Pattern
+
+lamExpr : Parser Term
+lamExpr = do
+  keyword "\\"
+  commit
+  name <- pattern
+  keyword "."
+  scope <- term
+  pure $ Lam name scope
+
+caseAlt : Parser CaseAlt
+
+caseExpr : Parser Term
+caseExpr = do
+  _ <- keyword "case"
+  commit
+  sc <- term
+  keyword "of"
+  alts <- startBlock $ someSame $ caseAlt
+  pure $ Case sc alts
+
+
+
+-- TODO - get this up and running with a case expr to test it
+
+
+
+{-
+so lets say we wanted to do the indent, the hard way. 
+
+what does this look like
+
+We want to say:
+   kw 'let' $  indentedSome assignment
+
+now assignment is going to be whatever, but we need to make sure the initial token can/must be at
+current indentation level.
+
+Ok idris indent is not Col, it's AnyIndent | AtPos Int | AfterPos Int
+
+The col though is somehow sixty.
+
+sixten is sameCol, dropAnchor, with an "environment"
+
+
+sixty is Position is line and col and either line matches or col < tokenCol.
+
+that's maybe doable..
+
+
+
+
+ -}
+
--- a/src/Lib/Parser/Impl.idr
+++ b/src/Lib/Parser/Impl.idr
@@ -0,0 +1,139 @@
+module Lib.Parser.Impl
+
+import Lib.Token
+
+public export
+TokenList : Type
+TokenList = List BTok
+
+-- Error of a parse
+public export
+data Error = E String
+%name Error err
+
+-- Result of a parse
+public export
+data Result : Type -> Type where
+  OK : a -> (toks : TokenList) -> (com : Bool) -> Result a
+  Fail : Error -> (toks : TokenList) -> (com : Bool)  -> Result a  
+
+export
+Functor Result where
+  map f (OK a toks com ) = OK (f a) toks com
+  map _ (Fail err toks com) = Fail err toks com
+
+-- So sixty just has a newtype function here now (probably for perf).
+-- A record might be more ergonomic, but would require a record impl before
+-- self hosting.
+
+-- We keep the line and column for indents. The idea being that we check
+-- either the col < tokCol or line == tokLine, enabling sameLevel.
+
+-- dunno why I'm making that a pair..
+export
+data Parser a = P (TokenList -> Bool -> (lc : (Int,Int)) -> Result a)
+
+runP : Parser a -> TokenList -> Bool -> (Int,Int) -> Result a
+runP (P f) = f
+
+fail : String -> Parser a
+fail msg = P $ \toks,com,col => Fail (E msg) toks com
+
+export
+Functor Parser where
+  map f (P pa) = P $ \ toks, com, col => map f (pa toks com col)
+
+export
+Applicative Parser where 
+  pure pa = P (\ toks, com, col => OK pa toks com)
+  P pab <*> P pa = P $ \toks,com,col =>
+    case pab toks com col of
+      Fail err toks com => Fail err toks com
+      OK f toks com => 
+        case pa toks com col of 
+          (OK x toks com) => OK (f x) toks com
+          (Fail err toks com) => Fail err toks com
+
+export
+Alternative Parser where
+  empty = fail "empty"
+  (P pa) <|> (P pb) = P $ \toks,com,col =>
+    case pa toks com col of
+      f@(Fail _ _ com') => if com' then f else pb toks com col
+      t => t
+
+export
+Monad Parser where
+  P pa >>= pab = P $ \toks,com,col =>
+    case pa toks com col of
+      (OK a toks com) => runP (pab a) toks com col
+      (Fail err xs x) => ?rhs_1
+
+
+-- do we need this?
+pred : (BTok -> Bool) -> String -> Parser String
+pred f msg = P $ \toks,com,col =>
+  case toks of
+    (t :: ts) => if f t then OK (value t) ts com else Fail (E msg) toks com
+    [] => Fail (E "eof") toks com
+
+export
+commit : Parser ()
+commit = P $ \toks,com,col => OK () toks True
+
+export
+token : Kind -> Parser String
+token k = pred (\t => t.val.kind == k) "Expected a \{show k}"
+
+
+export
+keyword : String -> Parser ()
+keyword kw = ignore $ pred (\t => t.val.text == kw) "Expected \{kw}"
+
+
+many : Parser a -> Parser (List a)
+some : Parser a -> Parser (List a)
+some p = (::) <$> p <*> many p
+many p = some p <|> pure []
+
+-- sixty let has some weird CPS stuff, but essentially:
+
+-- case token_ of
+--   Lexer.LLet -> PLet <$> blockOfMany let_ <* token Lexer.In <*> term
+
+-- withIndentationBlock - sets the col
+
+||| Start an indented block and run parser in it
+export
+startBlock : Parser a -> Parser a
+startBlock (P p) = P $ \toks,com,(l,c) => case toks of
+  [] => p toks com (l,c)
+  (t :: ts) =>
+    let (tl,tc) = start t
+    in p toks com (tl,tc)
+
+||| Assert that parser starts at our current column by
+||| checking column and then updating line to match the current
+export
+sameLevel : Parser a -> Parser a
+sameLevel (P p) = P $ \toks,com,(l,c) => case toks of
+  [] => p toks com (l,c)
+  (t :: ts) => 
+    let (tl,tc) = start t 
+    in if tc == c then p toks com (tl, c)
+    else if c < tc then Fail (E "unexpected indent") toks com
+    else Fail (E "unexpected indent") toks com
+
+export
+someSame : Parser a -> Parser (List a)
+someSame = some . sameLevel
+
+||| requires a token to be indented?
+indented : Parser a -> Parser a
+indented (P p) = P $ \toks,com,(l,c) => case toks of
+  [] => p toks com (l,c)
+  (t::ts) =>
+    let (tl,tc) = start t
+    in if tc > c || tl == l then p toks com (l,c)
+    else Fail (E "unexpected outdent") toks com
+
--- a/src/Lib/Test.idr
+++ b/src/Lib/Test.idr
@@ -0,0 +1,23 @@
+import Text.Lexer
+
+data Kind = Ignore | Ident | Oper | Number
+
+ignore : WithBounds (Token Kind) -> Bool
+ignore (MkBounded (Tok Ignore _) _ _) = True
+ignore _ = False
+
+tmap : TokenMap (Token Kind)
+tmap = [
+  (alpha <+> many alphaNum, Tok Ident),
+  (some digit, Tok Number),
+  (some symbol, Tok Oper),
+  (spaces, Tok Ignore)
+]
+
+show : WithBounds (Token Kind) -> String
+show t@(MkBounded (Tok _ v) _ _) = "\{show $ start t} \{show v}"
+
+main : IO ()
+main = do
+  let toks = filter (not . ignore) $ fst $ lex tmap  "let x = 1\n    y = 2\n in x + y"
+  traverse_ (putStrLn . show) toks
--- a/src/Lib/Tok2.idr
+++ b/src/Lib/Tok2.idr
@@ -0,0 +1,55 @@
+module Lib.Tok2
+
+import Text.Bounded
+
+data Kind 
+  = Ident 
+  | Keyword 
+  | Oper 
+  | Number 
+  | Symbol
+  | Arrow
+  | Ignore
+  | EOF
+
+data Token = Tok Kind String
+
+Show Kind where
+  show Ident   = "Ident"
+  show Keyword = "Keyword"
+  show Oper    = "Oper"
+  show Number  = "Number"
+  show Symbol  = "Symbol"
+  show Arrow   = "Arrow"
+  show Ignore  = "Ignore"
+  show EOF = "EOF"
+
+Show Token where show (Tok k v) = "<\{show k}:\{show v}>"
+
+keywords : List String
+keywords = ["let", "in", "where", "case"]
+
+specialOps : List String
+specialOps = ["->", ":"]
+
+checkKW : String -> Token
+checkKW s = if elem s keywords then Tok Keyword s else Tok Ident s
+
+opkind : String -> Kind
+opkind "->" = Arrow
+opkind _    = Oper
+
+isOpChar : Char -> Bool
+isOpChar c = c `elem` (unpack ":!#$%&*+./<=>?@\\^|-~")
+
+nextToken : Int -> Int -> List Char -> Maybe (Token, (Int, Int, List Char))
+nextToken row col xs = case xs of
+  []  => Nothing
+  (' ' :: cs) =>  nextToken row (col + 1) cs
+  (c :: cs)   =>
+    if isDigit c then getTok (Tok Number) isDigit [c] cs
+    else if isOpChar c then getTok else ?Foo
+  where
+    getTok : (String -> Token) -> (Char -> Bool) -> List Char -> List Char -> Maybe (Token, (Int, Int, List Char))
+
+
--- a/src/Lib/Token.idr
+++ b/src/Lib/Token.idr
@@ -0,0 +1,55 @@
+module Lib.Token
+
+import public Text.Lexer
+
+public export
+data Kind 
+  = Ident 
+  | Keyword 
+  | Oper 
+  | Number 
+  | Symbol
+  | Arrow
+  | Space
+  | LBrace
+  | Semi
+  | RBrace
+
+export
+Show Kind where
+  show Ident   = "Ident"
+  show Keyword = "Keyword"
+  show Oper    = "Oper"
+  show Number  = "Number"
+  show Symbol  = "Symbol"
+  show Arrow   = "Arrow"
+  show Space  = "Space"
+  show LBrace  = "LBrace"
+  show Semi    = "Semi"
+  show RBrace  = "RBrace"
+
+export
+Eq Kind where
+  Ident   == Ident = True
+  Keyword == Keyword = True
+  Oper    == Oper = True
+  Number  == Number = True
+  Symbol  == Symbol = True
+  Arrow   == Arrow = True
+  Space   == Space = True
+  LBrace  == LBrace = True
+  Semi    == Semi   = True
+  RBrace  == RBrace = True
+  _ == _ = False
+
+export
+Show k => Show (Token k) where
+  show (Tok k v) = "<\{show k}:\{show v}>"
+
+public export
+BTok : Type
+BTok = WithBounds (Token Kind)
+
+export
+value : BTok -> String
+value (MkBounded (Tok _ s) _ _) = s
--- a/src/Lib/Tokenizer.idr
+++ b/src/Lib/Tokenizer.idr
@@ -0,0 +1,54 @@
+module Lib.Tokenizer
+
+import Text.Lexer
+import Text.Lexer.Tokenizer
+import Lib.Token
+
+keywords : List String
+keywords = ["let", "in", "where", "case", "of", "data"]
+
+specialOps : List String
+specialOps = ["->", ":"]
+
+checkKW : String -> Token Kind
+checkKW s = if elem s keywords then Tok Keyword s else Tok Ident s
+
+opkind : String -> Kind
+opkind "->" = Arrow
+opkind _    = Oper
+
+isOpChar : Char -> Bool
+isOpChar c = c `elem` (unpack ":!#$%&*+./<=>?@\\^|-~")
+
+opChar : Lexer
+opChar = pred isOpChar
+
+-- so Text.Lexer.Core.lex is broken
+-- tmap : TokenMap (Token Kind)
+-- tmap = [
+--   (alpha <+> many alphaNum, checkKW),
+--   (some digit, Tok Number),
+--   (some opChar, \s => Tok (opkind s) s),
+--   (lineComment (exact "--"), Tok Space),
+--   (symbol, Tok Symbol),
+--   (spaces, Tok Space)
+-- ]
+
+rawTokens : Tokenizer (Token Kind)
+rawTokens
+   =  match (alpha <+> many alphaNum) checkKW
+  <|> match (some digit) (Tok Number)
+  <|> match (some opChar) (\s => Tok (opkind s) s)
+  <|> match (lineComment (exact "--")) (Tok Space)
+  <|> match symbol (Tok Symbol)
+  <|> match spaces (Tok Space)
+
+notSpace : WithBounds (Token Kind) -> Bool
+notSpace (MkBounded (Tok Space _) _ _) = False
+notSpace _ = True
+
+export
+tokenise : String -> List BTok
+tokenise = filter notSpace . fst . lex rawTokens 
+
+
--- a/src/Lib/Tokenizer2.idr
+++ b/src/Lib/Tokenizer2.idr
@@ -0,0 +1,78 @@
+module Lib.Tokenizer2
+
+-- Starting to write from-scratch tokenizer to flatten out the data a little.
+
+data Position = Pos Int Int
+
+inc : Position -> Position
+inc (Pos l c) = Pos l (c + 1)
+
+
+public export
+data Kind 
+  = Ident 
+  | Keyword 
+  | Oper 
+  | Comment
+  | Number 
+  | Symbol
+  | Arrow
+  -- virtual stuff deprecated?
+  | LBrace
+  | Semi
+  | RBrace
+  | EOF
+
+data Token = Tok Int Int Kind String
+
+data Lexer
+  = Match (Char -> Bool)
+  | Seq Lexer 
+
+keywords : List String
+keywords = ["let", "in", "where", "case", "of", "data"]
+
+specialOps : List String
+specialOps = ["->", ":"]
+
+-- checkKW : String -> Token Kind
+-- checkKW s = if elem s keywords then Tok Keyword s else Tok Ident s
+
+opkind : String -> Kind
+opkind "->" = Arrow
+opkind _    = Oper
+
+isOpChar : Char -> Bool
+isOpChar c = c `elem` (unpack ":!#$%&*+./<=>?@\\^|-~")
+
+-- opChar : Lexer
+-- opChar = pred isOpChar
+
+token : Int -> Int -> List Char -> (Token, List Char)
+token l c cs = case cs of
+  [] => (Tok l c EOF "",[])
+  ('-' :: '-' :: cs) => scan (/= '\n') ['-','-'] l (c+2) cs
+  _ => ?fixme
+
+  where
+    scan : (Char -> Bool) -> (List Char) -> Int -> Int -> List Char -> (Token,List Char)
+
+
+-- match could be a Maybe thing and use the maybe Alt impl.
+-- Not sure what shape I want here.  Maybe a straight up Character Parser and drop tokenization.
+
+-- rawTokens : Tokenizer (Token Kind)
+-- rawTokens
+--    =  match (alpha <+> many alphaNum) checkKW
+--   <|> match (some digit) (Tok Number)
+--   <|> match (some opChar) (\s => Tok (opkind s) s)
+--   <|> match (lineComment (exact "--")) (Tok Space)
+--   <|> match symbol (Tok Symbol)
+--   <|> match spaces (Tok Space)
+
+
+-- export
+-- tokenise : String -> List BTok
+-- tokenise = filter notSpace . fst . lex rawTokens 
+
+
--- a/src/Main.idr
+++ b/src/Main.idr
@@ -0,0 +1,28 @@
+module Main
+
+import Lib.Tokenizer
+import Lib.Layout
+import Lib.Token
+
+src = "let x = 1\n    y = 2\n in x + y"
+
+check : String -> IO ()
+check src = do
+  putStrLn "--"
+  printLn $ src
+  let toks = tokenise src
+  
+  printLn $ toks
+  let toks2 = layout toks
+  
+  printLn $ map value toks2
+
+main : IO ()
+main = do
+  -- this stuff is working with layout, should I bother with col.
+  -- downside is that it will be somewhat picky about the indentation of `in`
+  -- The sixty stuff looks promising.  I might want my own tokenizer though.
+  check "let x = 1\n    y = 2\n in x + y"
+  check "let x = 1 in x + 2"
+  check "case x of\n   True => something\n   False => let\n      x = 1\n      y = 2\n    in x + y"
+
--- a/src/Syntax.idr
+++ b/src/Syntax.idr
@@ -0,0 +1,58 @@
+module Syntax
+
+-- Good enough start, lets parse
+-- This is informed by pi-forall and others and is somewhat low level
+
+Name = String
+
+data Term : Type where
+
+TyTerm = Term
+
+data Literal = LString String | LInt Int | LBool Bool
+data RigCount = Rig0 | RigW
+data Plicity = Implicit | Explicit | Eq
+
+public export
+data Pattern
+  = PatVar Name
+  | PatCon Name (List (Pattern, RigCount))
+  | PatWild
+  | PatLit Literal
+
+-- could be a pair, but I suspect stuff will be added?
+public export
+data CaseAlt = MkAlt Pattern Term
+
+public export
+data Term
+  = Var Name
+  | Ann Term TyTerm
+  | Lit Literal
+  | Let Name Term Term
+  | Pi Name Plicity Term Term
+  | App Term Term
+  | Lam Pattern Term
+  | Case Term (List CaseAlt)
+  | Wildcard
+  | ParseError String
+
+-- derive some stuff - I'd like json, eq, show, ...
+
+data Decl : Type where
+
+Telescope: Type
+Telescope = List Decl -- pi-forall, always typeSig? 
+
+data ConstrDef = MkCDef Name Telescope
+
+data Decl
+  = TypeSig Name RigCount TyTerm
+  | Def Name Term
+  | Data Name Telescope (List ConstrDef)
+
+record Module where
+  constructor MkModule
+  name : Name
+  imports : List Name
+  decls : List Decl