chkpt

2022-09-08 22:45:07 -07:00
commit 39deff1465
13 changed files with 767 additions and 0 deletions
--- a/src/Lib/Tokenizer.idr
+++ b/src/Lib/Tokenizer.idr
@@ -0,0 +1,54 @@
+module Lib.Tokenizer
+
+import Text.Lexer
+import Text.Lexer.Tokenizer
+import Lib.Token
+
+keywords : List String
+keywords = ["let", "in", "where", "case", "of", "data"]
+
+specialOps : List String
+specialOps = ["->", ":"]
+
+checkKW : String -> Token Kind
+checkKW s = if elem s keywords then Tok Keyword s else Tok Ident s
+
+opkind : String -> Kind
+opkind "->" = Arrow
+opkind _    = Oper
+
+isOpChar : Char -> Bool
+isOpChar c = c `elem` (unpack ":!#$%&*+./<=>?@\\^|-~")
+
+opChar : Lexer
+opChar = pred isOpChar
+
+-- so Text.Lexer.Core.lex is broken
+-- tmap : TokenMap (Token Kind)
+-- tmap = [
+--   (alpha <+> many alphaNum, checkKW),
+--   (some digit, Tok Number),
+--   (some opChar, \s => Tok (opkind s) s),
+--   (lineComment (exact "--"), Tok Space),
+--   (symbol, Tok Symbol),
+--   (spaces, Tok Space)
+-- ]
+
+rawTokens : Tokenizer (Token Kind)
+rawTokens
+   =  match (alpha <+> many alphaNum) checkKW
+  <|> match (some digit) (Tok Number)
+  <|> match (some opChar) (\s => Tok (opkind s) s)
+  <|> match (lineComment (exact "--")) (Tok Space)
+  <|> match symbol (Tok Symbol)
+  <|> match spaces (Tok Space)
+
+notSpace : WithBounds (Token Kind) -> Bool
+notSpace (MkBounded (Tok Space _) _ _) = False
+notSpace _ = True
+
+export
+tokenise : String -> List BTok
+tokenise = filter notSpace . fst . lex rawTokens 
+
+