chkpt

2022-09-08 22:45:07 -07:00
commit 39deff1465
13 changed files with 767 additions and 0 deletions
--- a/newt.ipkg
+++ b/newt.ipkg
@@ -0,0 +1,47 @@
 package newt
 version = 0.1.0
 authors = "Steve Dunham"
 -- maintainers =
 -- license =
 -- brief =
 -- readme =
 -- homepage =
 -- sourceloc =
 -- bugtracker =
 -- the Idris2 version required (e.g. langversion >= 0.5.1)
 -- langversion
 -- packages to add to search path
 depends = contrib
 -- modules to install
 -- modules =
 -- main file (i.e. file to load at REPL)
 main = Main
 -- name of executable
 executable = "newt"
 -- opts =
 sourcedir = "src"
 -- builddir =
 -- outputdir =
 -- script to run before building
 -- prebuild =
 -- script to run after building
 -- postbuild =
 -- script to run after building, before installing
 -- preinstall =
 -- script to run after installing
 -- postinstall =
 -- script to run before cleaning
 -- preclean =
 -- script to run after cleaning
 -- postclean =
--- a/src/Lib/Grammar.idr
+++ b/src/Lib/Grammar.idr
@@ -0,0 +1,45 @@
 module Lib.Grammar
 -- Derived from Parser/Core.idr - revisit later when we want to be stack safe
 -- For simplicity (for now), it drops consumed flag from the type, which I think was
 -- there for totality checking.  May want it back someday.
 data Grammar : (state : Type) -> (tok : Type) -> Type -> Type where
  EOF : Grammar state tok ()
  Empty : a -> Grammar state tok a
  Fail : String -> Grammar state tok a
  Commit : Grammar state tok ()
  Alt : Grammar state tok ty -> Lazy (Grammar state tok ty) -> Grammar state tok ty
  Seq : Grammar state tok a -> (a -> Grammar state tok b) -> Grammar state tok b
 Functor (Grammar state tok) where
  map f (Fail str) = Fail str
  map f (Alt x y) = Alt (map f x) (map f y)
  map f (Seq x g) = ?todo_4
  map f (Empty v) = Empty (f v)
  -- Empty / EOF
  map f p = Seq p (Empty . f)
 Applicative (Grammar state tok) where
  pure a = Empty a
  x <*> y = ?hole3
 Alternative (Grammar state tok) where 
  empty = ?hole2
  (<|>) = Alt
 Monad (Grammar state tok) where
  (>>=) = Seq
 ParseError : Type
 data ParseResult : Type -> Type -> Type -> Type where
  Failure : (commit : Bool) -> List ParseError -> ParseResult state tok ty
 doParse : Semigroup state =>
          state ->
          (commit : Bool) ->
          Grammar state tok ty ->
          List tok ->
          ParseResult state tok ty
 doParse = ?todo
--- a/src/Lib/Layout.idr
+++ b/src/Lib/Layout.idr
@@ -0,0 +1,48 @@
 module Lib.Layout
 import Lib.Token
 import Text.Lexer
 -- Dunno if I'll do layout or pass col, but here it is:
 intro : List String
 intro = [ "where", "let", "of" ]
 Position : Type
 Position = (Int,Int)
 isIntro : BTok -> Bool
 isIntro t = elem (value t) intro
 export
 layout : List BTok -> List BTok
 layout = go [] []
  where
    lbrace = irrelevantBounds (Tok LBrace "{")
    semi = irrelevantBounds (Tok Semi ";")
    rbrace = irrelevantBounds (Tok RBrace "}")
    -- go' does start of block
    go' : List BTok -> List Position -> List BTok -> List BTok
    -- go does semi and end of block, we require "in" to be on separate, outdented line if not the same line as let
    go : List BTok -> List Position -> List BTok -> List BTok
    go acc lvls [] = reverse acc
    go acc [] ts = go' acc [] ts
    go acc lvls@(lvl::rest) toks@(t :: ts) =
      let (sr,sc) = start t in
      if      snd lvl >  sc then go (rbrace :: acc) rest toks
      else if snd lvl == sc then go' (semi :: acc) lvls toks
      else if value t == "in" && sr == fst lvl then go' (rbrace :: acc) rest toks
      else go' acc lvls toks
    -- handle start of block
    go' acc lvls [] = reverse acc
    go' acc lvls (t::ts) = case isIntro t of
      False => go (t::acc) lvls ts
      True => case ts of
          t'::ts' => go (t' :: lbrace :: t :: acc) (start t' :: lvls) ts'
          []      => go (rbrace :: lbrace :: t :: acc) lvls ts
--- a/src/Lib/Lexer2.idr
+++ b/src/Lib/Lexer2.idr
@@ -0,0 +1,56 @@
 module Lib.Lexer2
 data UToken
  = Ident String
  | Let
  | In
  | Data
  | Where
  | Forall
  | Case
  | Of
  | Underscore
  | Operator String
  | Equals
  | Dot
  | Colon
  | Pipe
  | RightArrow
  | FatArrow
  | Number Int
  | Lambda
  | LeftParen
  | RightParen
  | LeftBrace
  | RightBrace
  | Error
 -- we can skip this if we keep the text...
 Show UToken where
  show = \case
    Ident name => name
    Let => "let"
    In => "in"
    Data => "data"
    Where => "where"
    Forall => "forall"
    Case => "case"
    Of => "of"
    Underscore => "_"
    Operator name => name
    Equals => ?rhs_10
    Dot => ?rhs_11
    Colon => ?rhs_12
    Pipe => ?rhs_13
    RightArrow => ?rhs_14
    FatArrow => ?rhs_15
    Number i => show i
    Lambda => ?rhs_17
    LeftParen => ?rhs_18
    RightParen => ?rhs_19
    LeftBrace => ?rhs_20
    RightBrace => ?rhs_21
    Error => ?rhs_22
--- a/src/Lib/Parser.idr
+++ b/src/Lib/Parser.idr
@@ -0,0 +1,81 @@
 module Lib.Parser
 import Lib.Token
 import Lib.Parser.Impl
 import Syntax
 -- There is the whole core vs surface thing here.
 -- might be best to do core first/ Technically don't
 -- _need_ a parser, but would be useful for testing.
 -- look to pi-forall / ezoo, but I think we start with a
 -- TTImpl level grammar, then add a more sugared layer above
 -- so holes and all that
 -- After the parser runs, see below, take a break and finish pi-forall
 -- exercises.  There is some fill in the parser stuff that may show 
 -- the future.
 ident = token Ident
 term : Parser Term
 letExpr : Parser Term
 letExpr = Let <$ keyword "let" <*> ident <* keyword "=" <*> term <* keyword "in" <*> term
 pattern : Parser Pattern
 lamExpr : Parser Term
 lamExpr = do
  keyword "\\"
  commit
  name <- pattern
  keyword "."
  scope <- term
  pure $ Lam name scope
 caseAlt : Parser CaseAlt
 caseExpr : Parser Term
 caseExpr = do
  _ <- keyword "case"
  commit
  sc <- term
  keyword "of"
  alts <- startBlock $ someSame $ caseAlt
  pure $ Case sc alts
 -- TODO - get this up and running with a case expr to test it
 {-
 so lets say we wanted to do the indent, the hard way. 
 what does this look like
 We want to say:
   kw 'let' $  indentedSome assignment
 now assignment is going to be whatever, but we need to make sure the initial token can/must be at
 current indentation level.
 Ok idris indent is not Col, it's AnyIndent | AtPos Int | AfterPos Int
 The col though is somehow sixty.
 sixten is sameCol, dropAnchor, with an "environment"
 sixty is Position is line and col and either line matches or col < tokenCol.
 that's maybe doable..
 -}
--- a/src/Lib/Parser/Impl.idr
+++ b/src/Lib/Parser/Impl.idr
@@ -0,0 +1,139 @@
 module Lib.Parser.Impl
 import Lib.Token
 public export
 TokenList : Type
 TokenList = List BTok
 -- Error of a parse
 public export
 data Error = E String
 %name Error err
 -- Result of a parse
 public export
 data Result : Type -> Type where
  OK : a -> (toks : TokenList) -> (com : Bool) -> Result a
  Fail : Error -> (toks : TokenList) -> (com : Bool)  -> Result a  
 export
 Functor Result where
  map f (OK a toks com ) = OK (f a) toks com
  map _ (Fail err toks com) = Fail err toks com
 -- So sixty just has a newtype function here now (probably for perf).
 -- A record might be more ergonomic, but would require a record impl before
 -- self hosting.
 -- We keep the line and column for indents. The idea being that we check
 -- either the col < tokCol or line == tokLine, enabling sameLevel.
 -- dunno why I'm making that a pair..
 export
 data Parser a = P (TokenList -> Bool -> (lc : (Int,Int)) -> Result a)
 runP : Parser a -> TokenList -> Bool -> (Int,Int) -> Result a
 runP (P f) = f
 fail : String -> Parser a
 fail msg = P $ \toks,com,col => Fail (E msg) toks com
 export
 Functor Parser where
  map f (P pa) = P $ \ toks, com, col => map f (pa toks com col)
 export
 Applicative Parser where 
  pure pa = P (\ toks, com, col => OK pa toks com)
  P pab <*> P pa = P $ \toks,com,col =>
    case pab toks com col of
      Fail err toks com => Fail err toks com
      OK f toks com => 
        case pa toks com col of 
          (OK x toks com) => OK (f x) toks com
          (Fail err toks com) => Fail err toks com
 export
 Alternative Parser where
  empty = fail "empty"
  (P pa) <|> (P pb) = P $ \toks,com,col =>
    case pa toks com col of
      f@(Fail _ _ com') => if com' then f else pb toks com col
      t => t
 export
 Monad Parser where
  P pa >>= pab = P $ \toks,com,col =>
    case pa toks com col of
      (OK a toks com) => runP (pab a) toks com col
      (Fail err xs x) => ?rhs_1
 -- do we need this?
 pred : (BTok -> Bool) -> String -> Parser String
 pred f msg = P $ \toks,com,col =>
  case toks of
    (t :: ts) => if f t then OK (value t) ts com else Fail (E msg) toks com
    [] => Fail (E "eof") toks com
 export
 commit : Parser ()
 commit = P $ \toks,com,col => OK () toks True
 export
 token : Kind -> Parser String
 token k = pred (\t => t.val.kind == k) "Expected a \{show k}"
 export
 keyword : String -> Parser ()
 keyword kw = ignore $ pred (\t => t.val.text == kw) "Expected \{kw}"
 many : Parser a -> Parser (List a)
 some : Parser a -> Parser (List a)
 some p = (::) <$> p <*> many p
 many p = some p <|> pure []
 -- sixty let has some weird CPS stuff, but essentially:
 -- case token_ of
 --   Lexer.LLet -> PLet <$> blockOfMany let_ <* token Lexer.In <*> term
 -- withIndentationBlock - sets the col
 ||| Start an indented block and run parser in it
 export
 startBlock : Parser a -> Parser a
 startBlock (P p) = P $ \toks,com,(l,c) => case toks of
  [] => p toks com (l,c)
  (t :: ts) =>
    let (tl,tc) = start t
    in p toks com (tl,tc)
 ||| Assert that parser starts at our current column by
 ||| checking column and then updating line to match the current
 export
 sameLevel : Parser a -> Parser a
 sameLevel (P p) = P $ \toks,com,(l,c) => case toks of
  [] => p toks com (l,c)
  (t :: ts) => 
    let (tl,tc) = start t 
    in if tc == c then p toks com (tl, c)
    else if c < tc then Fail (E "unexpected indent") toks com
    else Fail (E "unexpected indent") toks com
 export
 someSame : Parser a -> Parser (List a)
 someSame = some . sameLevel
 ||| requires a token to be indented?
 indented : Parser a -> Parser a
 indented (P p) = P $ \toks,com,(l,c) => case toks of
  [] => p toks com (l,c)
  (t::ts) =>
    let (tl,tc) = start t
    in if tc > c || tl == l then p toks com (l,c)
    else Fail (E "unexpected outdent") toks com
--- a/src/Lib/Test.idr
+++ b/src/Lib/Test.idr
@@ -0,0 +1,23 @@
 import Text.Lexer
 data Kind = Ignore | Ident | Oper | Number
 ignore : WithBounds (Token Kind) -> Bool
 ignore (MkBounded (Tok Ignore _) _ _) = True
 ignore _ = False
 tmap : TokenMap (Token Kind)
 tmap = [
  (alpha <+> many alphaNum, Tok Ident),
  (some digit, Tok Number),
  (some symbol, Tok Oper),
  (spaces, Tok Ignore)
 ]
 show : WithBounds (Token Kind) -> String
 show t@(MkBounded (Tok _ v) _ _) = "\{show $ start t} \{show v}"
 main : IO ()
 main = do
  let toks = filter (not . ignore) $ fst $ lex tmap  "let x = 1\n    y = 2\n in x + y"
  traverse_ (putStrLn . show) toks
--- a/src/Lib/Tok2.idr
+++ b/src/Lib/Tok2.idr
@@ -0,0 +1,55 @@
 module Lib.Tok2
 import Text.Bounded
 data Kind 
  = Ident 
  | Keyword 
  | Oper 
  | Number 
  | Symbol
  | Arrow
  | Ignore
  | EOF
 data Token = Tok Kind String
 Show Kind where
  show Ident   = "Ident"
  show Keyword = "Keyword"
  show Oper    = "Oper"
  show Number  = "Number"
  show Symbol  = "Symbol"
  show Arrow   = "Arrow"
  show Ignore  = "Ignore"
  show EOF = "EOF"
 Show Token where show (Tok k v) = "<\{show k}:\{show v}>"
 keywords : List String
 keywords = ["let", "in", "where", "case"]
 specialOps : List String
 specialOps = ["->", ":"]
 checkKW : String -> Token
 checkKW s = if elem s keywords then Tok Keyword s else Tok Ident s
 opkind : String -> Kind
 opkind "->" = Arrow
 opkind _    = Oper
 isOpChar : Char -> Bool
 isOpChar c = c `elem` (unpack ":!#$%&*+./<=>?@\\^|-~")
 nextToken : Int -> Int -> List Char -> Maybe (Token, (Int, Int, List Char))
 nextToken row col xs = case xs of
  []  => Nothing
  (' ' :: cs) =>  nextToken row (col + 1) cs
  (c :: cs)   =>
    if isDigit c then getTok (Tok Number) isDigit [c] cs
    else if isOpChar c then getTok else ?Foo
  where
    getTok : (String -> Token) -> (Char -> Bool) -> List Char -> List Char -> Maybe (Token, (Int, Int, List Char))
--- a/src/Lib/Token.idr
+++ b/src/Lib/Token.idr
@@ -0,0 +1,55 @@
 module Lib.Token
 import public Text.Lexer
 public export
 data Kind 
  = Ident 
  | Keyword 
  | Oper 
  | Number 
  | Symbol
  | Arrow
  | Space
  | LBrace
  | Semi
  | RBrace
 export
 Show Kind where
  show Ident   = "Ident"
  show Keyword = "Keyword"
  show Oper    = "Oper"
  show Number  = "Number"
  show Symbol  = "Symbol"
  show Arrow   = "Arrow"
  show Space  = "Space"
  show LBrace  = "LBrace"
  show Semi    = "Semi"
  show RBrace  = "RBrace"
 export
 Eq Kind where
  Ident   == Ident = True
  Keyword == Keyword = True
  Oper    == Oper = True
  Number  == Number = True
  Symbol  == Symbol = True
  Arrow   == Arrow = True
  Space   == Space = True
  LBrace  == LBrace = True
  Semi    == Semi   = True
  RBrace  == RBrace = True
  _ == _ = False
 export
 Show k => Show (Token k) where
  show (Tok k v) = "<\{show k}:\{show v}>"
 public export
 BTok : Type
 BTok = WithBounds (Token Kind)
 export
 value : BTok -> String
 value (MkBounded (Tok _ s) _ _) = s
--- a/src/Lib/Tokenizer.idr
+++ b/src/Lib/Tokenizer.idr
@@ -0,0 +1,54 @@
 module Lib.Tokenizer
 import Text.Lexer
 import Text.Lexer.Tokenizer
 import Lib.Token
 keywords : List String
 keywords = ["let", "in", "where", "case", "of", "data"]
 specialOps : List String
 specialOps = ["->", ":"]
 checkKW : String -> Token Kind
 checkKW s = if elem s keywords then Tok Keyword s else Tok Ident s
 opkind : String -> Kind
 opkind "->" = Arrow
 opkind _    = Oper
 isOpChar : Char -> Bool
 isOpChar c = c `elem` (unpack ":!#$%&*+./<=>?@\\^|-~")
 opChar : Lexer
 opChar = pred isOpChar
 -- so Text.Lexer.Core.lex is broken
 -- tmap : TokenMap (Token Kind)
 -- tmap = [
 --   (alpha <+> many alphaNum, checkKW),
 --   (some digit, Tok Number),
 --   (some opChar, \s => Tok (opkind s) s),
 --   (lineComment (exact "--"), Tok Space),
 --   (symbol, Tok Symbol),
 --   (spaces, Tok Space)
 -- ]
 rawTokens : Tokenizer (Token Kind)
 rawTokens
   =  match (alpha <+> many alphaNum) checkKW
  <|> match (some digit) (Tok Number)
  <|> match (some opChar) (\s => Tok (opkind s) s)
  <|> match (lineComment (exact "--")) (Tok Space)
  <|> match symbol (Tok Symbol)
  <|> match spaces (Tok Space)
 notSpace : WithBounds (Token Kind) -> Bool
 notSpace (MkBounded (Tok Space _) _ _) = False
 notSpace _ = True
 export
 tokenise : String -> List BTok
 tokenise = filter notSpace . fst . lex rawTokens 
--- a/src/Lib/Tokenizer2.idr
+++ b/src/Lib/Tokenizer2.idr
@@ -0,0 +1,78 @@
 module Lib.Tokenizer2
 -- Starting to write from-scratch tokenizer to flatten out the data a little.
 data Position = Pos Int Int
 inc : Position -> Position
 inc (Pos l c) = Pos l (c + 1)
 public export
 data Kind 
  = Ident 
  | Keyword 
  | Oper 
  | Comment
  | Number 
  | Symbol
  | Arrow
  -- virtual stuff deprecated?
  | LBrace
  | Semi
  | RBrace
  | EOF
 data Token = Tok Int Int Kind String
 data Lexer
  = Match (Char -> Bool)
  | Seq Lexer 
 keywords : List String
 keywords = ["let", "in", "where", "case", "of", "data"]
 specialOps : List String
 specialOps = ["->", ":"]
 -- checkKW : String -> Token Kind
 -- checkKW s = if elem s keywords then Tok Keyword s else Tok Ident s
 opkind : String -> Kind
 opkind "->" = Arrow
 opkind _    = Oper
 isOpChar : Char -> Bool
 isOpChar c = c `elem` (unpack ":!#$%&*+./<=>?@\\^|-~")
 -- opChar : Lexer
 -- opChar = pred isOpChar
 token : Int -> Int -> List Char -> (Token, List Char)
 token l c cs = case cs of
  [] => (Tok l c EOF "",[])
  ('-' :: '-' :: cs) => scan (/= '\n') ['-','-'] l (c+2) cs
  _ => ?fixme
  where
    scan : (Char -> Bool) -> (List Char) -> Int -> Int -> List Char -> (Token,List Char)
 -- match could be a Maybe thing and use the maybe Alt impl.
 -- Not sure what shape I want here.  Maybe a straight up Character Parser and drop tokenization.
 -- rawTokens : Tokenizer (Token Kind)
 -- rawTokens
 --    =  match (alpha <+> many alphaNum) checkKW
 --   <|> match (some digit) (Tok Number)
 --   <|> match (some opChar) (\s => Tok (opkind s) s)
 --   <|> match (lineComment (exact "--")) (Tok Space)
 --   <|> match symbol (Tok Symbol)
 --   <|> match spaces (Tok Space)
 -- export
 -- tokenise : String -> List BTok
 -- tokenise = filter notSpace . fst . lex rawTokens 
--- a/src/Main.idr
+++ b/src/Main.idr
@@ -0,0 +1,28 @@
 module Main
 import Lib.Tokenizer
 import Lib.Layout
 import Lib.Token
 src = "let x = 1\n    y = 2\n in x + y"
 check : String -> IO ()
 check src = do
  putStrLn "--"
  printLn $ src
  let toks = tokenise src
  printLn $ toks
  let toks2 = layout toks
  printLn $ map value toks2
 main : IO ()
 main = do
  -- this stuff is working with layout, should I bother with col.
  -- downside is that it will be somewhat picky about the indentation of `in`
  -- The sixty stuff looks promising.  I might want my own tokenizer though.
  check "let x = 1\n    y = 2\n in x + y"
  check "let x = 1 in x + 2"
  check "case x of\n   True => something\n   False => let\n      x = 1\n      y = 2\n    in x + y"
--- a/src/Syntax.idr
+++ b/src/Syntax.idr
@@ -0,0 +1,58 @@
 module Syntax
 -- Good enough start, lets parse
 -- This is informed by pi-forall and others and is somewhat low level
 Name = String
 data Term : Type where
 TyTerm = Term
 data Literal = LString String | LInt Int | LBool Bool
 data RigCount = Rig0 | RigW
 data Plicity = Implicit | Explicit | Eq
 public export
 data Pattern
  = PatVar Name
  | PatCon Name (List (Pattern, RigCount))
  | PatWild
  | PatLit Literal
 -- could be a pair, but I suspect stuff will be added?
 public export
 data CaseAlt = MkAlt Pattern Term
 public export
 data Term
  = Var Name
  | Ann Term TyTerm
  | Lit Literal
  | Let Name Term Term
  | Pi Name Plicity Term Term
  | App Term Term
  | Lam Pattern Term
  | Case Term (List CaseAlt)
  | Wildcard
  | ParseError String
 -- derive some stuff - I'd like json, eq, show, ...
 data Decl : Type where
 Telescope: Type
 Telescope = List Decl -- pi-forall, always typeSig? 
 data ConstrDef = MkCDef Name Telescope
 data Decl
  = TypeSig Name RigCount TyTerm
  | Def Name Term
  | Data Name Telescope (List ConstrDef)
 record Module where
  constructor MkModule
  name : Name
  imports : List Name
  decls : List Decl