This commit is contained in:
2022-09-08 22:45:07 -07:00
commit 39deff1465
13 changed files with 767 additions and 0 deletions

54
src/Lib/Tokenizer.idr Normal file
View File

@@ -0,0 +1,54 @@
module Lib.Tokenizer
import Text.Lexer
import Text.Lexer.Tokenizer
import Lib.Token
keywords : List String
keywords = ["let", "in", "where", "case", "of", "data"]
specialOps : List String
specialOps = ["->", ":"]
checkKW : String -> Token Kind
checkKW s = if elem s keywords then Tok Keyword s else Tok Ident s
opkind : String -> Kind
opkind "->" = Arrow
opkind _ = Oper
isOpChar : Char -> Bool
isOpChar c = c `elem` (unpack ":!#$%&*+./<=>?@\\^|-~")
opChar : Lexer
opChar = pred isOpChar
-- so Text.Lexer.Core.lex is broken
-- tmap : TokenMap (Token Kind)
-- tmap = [
-- (alpha <+> many alphaNum, checkKW),
-- (some digit, Tok Number),
-- (some opChar, \s => Tok (opkind s) s),
-- (lineComment (exact "--"), Tok Space),
-- (symbol, Tok Symbol),
-- (spaces, Tok Space)
-- ]
rawTokens : Tokenizer (Token Kind)
rawTokens
= match (alpha <+> many alphaNum) checkKW
<|> match (some digit) (Tok Number)
<|> match (some opChar) (\s => Tok (opkind s) s)
<|> match (lineComment (exact "--")) (Tok Space)
<|> match symbol (Tok Symbol)
<|> match spaces (Tok Space)
notSpace : WithBounds (Token Kind) -> Bool
notSpace (MkBounded (Tok Space _) _ _) = False
notSpace _ = True
export
tokenise : String -> List BTok
tokenise = filter notSpace . fst . lex rawTokens