Files
newt/src/Lib/Compile.newt

472 lines
20 KiB
Agda
Raw Blame History

-- TODO Audit how much "outside" stuff could pile up in the continuation. (How much might be repeated in case branches.)
-- TODO consider inlining constructors here (a future inline process might do this anyway).
-- TODO consider not emitting null in `LitObject` (after inlining constructors)
module Lib.Compile
import Prelude
import Lib.Common
import Lib.Types
import Lib.Eval
import Lib.Prettier
import Lib.CompileExp
import Lib.TopContext
import Lib.LiftWhere
import Lib.LiftLambda
import Lib.TCO
import Lib.Ref2
import Lib.Erasure
import Data.String
import Data.Int
import Data.SortedMap
import Data.IORef
data StKind = Plain | Return | Assign String
JSStmt : StKind -> U
JSExp : U
data JAlt : U where
JConAlt : e. Nat -> JSStmt e -> JAlt
JDefAlt : e. JSStmt e -> JAlt
JLitAlt : e. JSExp -> JSStmt e -> JAlt
data JSExp : U where
LitArray : List JSExp -> JSExp
LitObject : List (String × JSExp) -> JSExp
LitString : String -> JSExp
LitInt : Int -> JSExp
Apply : JSExp -> List JSExp -> JSExp
Var : String -> JSExp
JLam : List String -> JSStmt Return -> JSExp
JPrimOp : String JSExp JSExp JSExp
JUndefined : JSExp
Index : JSExp -> JSExp -> JSExp
Dot : JSExp -> String -> JSExp
Raw : String -> JSExp
data JSStmt : StKind -> U where
-- Maybe make this a snoc...
JSnoc : a. JSStmt Plain -> JSStmt a -> JSStmt a
JPlain : JSExp -> JSStmt Plain
JConst : (nm : String) -> JSExp -> JSStmt Plain
JReturn : JSExp -> JSStmt Return
JLet : (nm : String) -> JSStmt (Assign nm) -> JSStmt Plain -- need somebody to assign
JAssign : (nm : String) -> JSExp -> JSStmt (Assign nm)
-- TODO - switch to Int tags
JCase : a. JSExp -> List JAlt -> JSStmt a
-- throw can't be used
JError : a. String -> JSStmt a
Cont : StKind U
Cont e = JSExp -> JSStmt e
-- JSEnv contains `Var` for binders or `Dot` for destructured data. It
-- used to translate binders
record JSEnv where
constructor MkEnv
jsenv : List JSExp
-- This is not depth, it is incremented as we go down the tree to get fresh names
depth : Int
-- this was like this, are we not using depth?
push : JSEnv -> JSExp -> JSEnv
push (MkEnv env depth) exp = MkEnv (exp :: env) depth
incr : JSEnv JSEnv
incr env = MkEnv env.jsenv (1 + env.depth)
emptyJSEnv : JSEnv
emptyJSEnv = MkEnv Nil 0
litToJS : Literal -> JSExp
litToJS (LString str) = LitString str
litToJS (LChar c) = LitString $ pack (c :: Nil)
litToJS (LInt i) = LitInt i
-- Stuff nm.h1, nm.h2, ... into environment
mkEnv : JSExp -> Int -> JSEnv -> List String -> JSEnv
mkEnv nm k env Nil = env
mkEnv nm k env (x :: xs) = mkEnv nm (1 + k) (push env (Dot nm "h\{show k}")) xs
envNames : Env -> List String
-- given a name, find a similar one that doesn't shadow in Env
freshName : String -> JSEnv -> String
freshName nm env = if free env.jsenv nm then nm else go nm 1
where
free : List JSExp -> String -> Bool
free Nil nm = True
free (Var n :: xs) nm = if n == nm then False else free xs nm
free (_ :: xs) nm = free xs nm
go : String -> Int -> String
go nm k = let nm' = "\{nm}\{show k}" in if free env.jsenv nm' then nm' else go nm (1 + k)
freshName' : String -> JSEnv -> (String × JSEnv)
freshName' nm env =
let nm' = freshName nm env -- "\{nm}$\{show $ length env}"
env' = push env (Var nm')
in (nm', env')
freshNames : List String -> JSEnv -> (List String × JSEnv)
freshNames nms env = go nms env Lin
where
go : List Name -> JSEnv -> SnocList Name -> (List String × JSEnv)
go Nil env acc = (acc <>> Nil, env)
go (n :: ns) env acc =
let (n', env') = freshName' n env
in go ns env' (acc :< n')
-- These expressions are added to the environment rather than assigned to a name
simpleJSExp : JSExp → Bool
simpleJSExp (Var _) = True
simpleJSExp (Dot a _) = simpleJSExp a
simpleJSExp (JUndefined) = True
simpleJSExp (Index a b) = if simpleJSExp a then simpleJSExp b else False
simpleJSExp (LitInt _) = True
simpleJSExp (LitString _) = True
simpleJSExp _ = False
-- This is inspired by A-normalization, look into the continuation monad
-- There is an index on JSStmt, adopted from Stefan Hoeck's code.
--
-- Here we turn a Term into a statement (which may be a sequence of statements), there
-- is a continuation, which turns the final JSExpr into a JSStmt, and the function returns
-- a JSStmt, wrapping recursive calls in JSnoc if necessary.
termToJS : ∀ e. JSEnv -> CExp -> Cont e -> JSStmt e
termToJS env (CBnd k) f = case getAt (cast k) env.jsenv of
(Just e) => f e
Nothing => fatalError "Bad bounds"
termToJS env CErased f = f JUndefined
termToJS env (CRaw str _) f = f (Raw str)
termToJS env (CLam nm t) f =
let (nm',env') = freshName' nm env -- "\{nm}$\{show $ length env}"
in f $ JLam (nm' :: Nil) (termToJS env' t JReturn)
termToJS env (CFun nms t) f =
let (nms', env') = freshNames nms env
in f $ JLam nms' (termToJS env' t JReturn)
termToJS env (CRef nm) f = f $ Var (show nm)
termToJS env (CPrimOp op t u) f = termToJS env t $ \ t => termToJS env u $ \ u => f $ JPrimOp op t u
termToJS env (CMeta k) f = f $ LitString "META \{show k}"
termToJS env (CLit lit) f = f (litToJS lit)
-- if it's a var, just use the original
termToJS env (CLet nm (CBnd k) u) f = case getAt (cast k) env.jsenv of
Just e => termToJS (push env e) u f
Nothing => fatalError "bad bounds"
-- For a let, we run with a continuation to JAssign to a pre-declared variable
-- if JAssign comes back out, we either push the JSExpr into the environment or JConst it,
-- depending on complexity. Otherwise, stick the declaration in front.
termToJS env (CLet nm t u) f =
let nm' = freshName nm env
env' = push env (Var nm')
in case termToJS env t (JAssign nm') of
(JAssign _ exp) => if simpleJSExp exp
then termToJS (push env exp) u f
else JSnoc (JConst nm' exp) (termToJS env' u f)
t' => JSnoc (JLet nm' t') (termToJS env' u f)
termToJS env (CLetRec nm CErased u) f = termToJS (push env JUndefined) u f
termToJS env (CLetRec nm t u) f =
-- this shouldn't happen if where is lifted
let nm' = freshName nm env
env' = push env (Var nm')
-- If it's a simple term, use const
in case termToJS env' t (JAssign nm') of
(JAssign _ exp) => JSnoc (JConst nm' exp) (termToJS env' u f)
t' => JSnoc (JLet nm' t') (termToJS env' u f)
termToJS env (CConstr ix _ args) f = go args 0 (\ args => f $ LitObject (("tag", LitInt (cast ix)) :: args))
where
go : e. List CExp -> Int -> (List (String × JSExp) -> JSStmt e) -> JSStmt e
go Nil ix k = k Nil
go (t :: ts) ix k = termToJS env t $ \ t' => go ts (ix + 1) $ \ args => k $ ("h\{show ix}", t') :: args
termToJS env (CAppRef nm args etas) f = termToJS env (CRef nm) (\ t' => (argsToJS env t' args Lin f))
where
etaExpand : JSEnv -> Nat -> SnocList JSExp -> JSExp -> JSExp
etaExpand env Z args tm = Apply tm (args <>> Nil)
etaExpand env (S etas) args tm =
let nm' = freshName "eta" env
env' = push env (Var nm')
in JLam (nm' :: Nil) $ JReturn $ etaExpand (push env (Var nm')) etas (args :< Var nm') tm
argsToJS : e. JSEnv -> JSExp -> List CExp -> SnocList JSExp -> (JSExp -> JSStmt e) -> JSStmt e
argsToJS env tm Nil acc k = k (etaExpand env (cast etas) acc tm)
argsToJS env tm (x :: xs) acc k = termToJS env x (\ x' => argsToJS (incr env) tm xs (acc :< x') k)
termToJS env (CApp t arg) f = termToJS env t (\ t' => termToJS env arg (\arg' => f (Apply t' (arg' :: Nil))))
termToJS {e} env (CCase t alts) f =
termToJS env t $ \case
(Var nm) => maybeCaseStmt env (Var nm) alts
t' => do
-- TODO with inlining, we hit cases where the let gets pulled forward more than once
-- two cases as separate args, se we need actual unique names. For now, we're calling
-- incr when processing App, as a stopgap, we probably need a fresh names state monad
let nm = "_sc$\{show env.depth}"
-- increment the bit that goes into the name
let env' = incr env
if simpleJSExp t'
then (maybeCaseStmt env' t' alts)
else JSnoc (JConst nm t') (maybeCaseStmt env' (Var nm) alts)
where
termToJSAlt : JSEnv -> JSExp -> CAlt -> JAlt
termToJSAlt env nm (CConAlt ix name info args u) = JConAlt ix (termToJS (mkEnv nm 0 env args) u f)
-- intentionally reusing scrutinee name here
termToJSAlt env nm (CDefAlt u) = JDefAlt (termToJS (env) u f)
termToJSAlt env nm (CLitAlt lit u) = JLitAlt (litToJS lit) (termToJS env u f)
maybeCaseStmt : JSEnv -> JSExp -> List CAlt -> JSStmt e
-- If there is a single alt, assume it matched
maybeCaseStmt env nm ((CConAlt _ _ info args u) :: Nil) = (termToJS (mkEnv nm 0 env args) u f)
maybeCaseStmt env nm alts@(CLitAlt _ _ :: _) =
(JCase nm (map (termToJSAlt env nm) alts))
maybeCaseStmt env nm alts =
(JCase (Dot nm "tag") (map (termToJSAlt env nm) alts))
jsKeywords : List String
jsKeywords = (
"break" :: "case" :: "catch" :: "continue" :: "debugger" :: "default" :: "delete" :: "do" :: "else" ::
"finally" :: "for" :: "function" :: "if" :: "in" :: "instanceof" :: "new" :: "return" :: "switch" ::
"this" :: "throw" :: "try" :: "typeof" :: "var" :: "void" :: "while" :: "with" ::
"class" :: "const" :: "enum" :: "export" :: "extends" :: "import" :: "super" ::
"implements" :: "class" :: "let" :: "package" :: "private" :: "protected" :: "public" ::
"static" :: "yield" ::
"null" :: "true" :: "false" ::
-- might not occur now that we have namespaces on the names
"String" :: "Number" :: "Array" :: "BigInt" :: Nil)
-- escape identifiers for js
jsIdent : String -> Doc
jsIdent id = if elem id jsKeywords then text ("$" ++ id) else text $ pack $ fix (unpack id)
where
fix : List Char -> List Char
fix Nil = Nil
fix (x :: xs) =
if isAlphaNum x || x == '_' then
x :: fix xs
-- make qualified names more readable
else if x == '.' then '_' :: fix xs
else if x == '$' then
'$' :: '$' :: fix xs
else
'$' :: (toHex (cast x)) ++ fix xs
stmtToDoc : e. JSStmt e -> Doc
expToDoc : JSExp -> Doc
expToDoc (LitArray xs) = fatalError "TODO - LitArray to doc"
expToDoc (LitObject xs) = text "{" <+> folddoc (\ a e => a ++ text ", " <+/> e) (map entry xs) <+> text "}"
where
entry : (String × JSExp) -> Doc
entry (nm, exp) = jsIdent nm ++ text ":" <+> expToDoc exp
expToDoc (LitString str) = text $ quoteString str
expToDoc (LitInt i) = text $ show i
expToDoc (Raw str) = text str
-- TODO add precedence
expToDoc (Apply x@(JLam _ _) xs) = text "(" ++ expToDoc x ++ text ")" ++ text "(" ++ nest 2 (commaSep (map expToDoc xs)) ++ text ")"
expToDoc (Apply x xs) = expToDoc x ++ text "(" ++ nest 2 (commaSep (map expToDoc xs)) ++ text ")"
expToDoc (Var nm) = jsIdent nm
expToDoc (JLam nms (JReturn exp)) = text "(" <+> commaSep (map jsIdent nms) <+> text ") =>" <+> text "(" ++ expToDoc exp ++ text ")"
expToDoc (JLam nms body) = text "(" <+> commaSep (map jsIdent nms) <+> text ") =>" <+> bracket "{" (stmtToDoc body) "}"
expToDoc JUndefined = text "null"
expToDoc (Index obj ix) = expToDoc obj ++ text "[" ++ expToDoc ix ++ text "]"
expToDoc (Dot obj nm) = expToDoc obj ++ text "." ++ jsIdent nm
expToDoc (JPrimOp op t u) = parens 0 1 (expToDoc t) <+> text op <+> parens 0 1 (expToDoc u)
caseBody : e. JSStmt e -> Doc
caseBody stmt@(JReturn x) = nest 2 (line ++ stmtToDoc stmt)
caseBody {e} stmt@(JCase _ _) = nest 2 (line ++ stmtToDoc stmt </> text "break;")
caseBody stmt = line ++ text "{" ++ nest 2 (line ++ stmtToDoc stmt </> text "break;") </> text "}"
altToDoc : JAlt -> Doc
altToDoc (JConAlt nm stmt) = text "case" <+> text (show nm) ++ text ":" ++ caseBody stmt
altToDoc (JDefAlt stmt) = text "default" ++ text ":" ++ caseBody stmt
altToDoc (JLitAlt a stmt) = text "case" <+> expToDoc a ++ text ":" ++ caseBody stmt
stmtToDoc (JSnoc x y) = stmtToDoc x </> stmtToDoc y
stmtToDoc (JPlain x) = expToDoc x ++ text ";"
-- I might not need these split yet.
stmtToDoc (JLet nm body) = text "let" <+> jsIdent nm ++ text ";" </> stmtToDoc body
stmtToDoc (JAssign nm expr) = jsIdent nm <+> text "=" <+> expToDoc expr ++ text ";"
stmtToDoc (JConst nm x) = text "const" <+> jsIdent nm <+> nest 2 (text "=" <+/> expToDoc x ++ text ";")
stmtToDoc (JReturn x) = text "return" <+> expToDoc x ++ text ";"
stmtToDoc (JError str) = text "throw new Error(" ++ text (quoteString str) ++ text ");"
stmtToDoc (JCase sc alts) =
text "switch (" ++ expToDoc sc ++ text ")" <+> bracket "{" (stack $ map altToDoc alts) "}"
-- use iife to turn stmts into expr
maybeWrap : JSStmt Return -> JSExp
maybeWrap (JReturn exp) = exp
maybeWrap stmt = Apply (JLam Nil stmt) Nil
cexpToDoc : (QName × CExp) -> Doc
cexpToDoc (qn, ct) =
-- If we leak extra statements/assignments, we need an IIFE
let exp = maybeWrap $ termToJS emptyJSEnv ct JReturn
in stmtToDoc $ JConst (show qn) exp
-- Collect the QNames used in a term
getNames : Tm -> List QName -> List QName
getNames (Ref x nm) acc = nm :: acc
getNames (Lam x str _ _ t) acc = getNames t acc
getNames (App x t u) acc = getNames u $ getNames t acc
getNames (Pi x str icit y t u) acc = getNames u $ getNames t $ QN primNS "PiType" :: acc
getNames (Let x str t u) acc = getNames u $ getNames t acc
getNames (LetRec x str _ t u) acc = getNames u $ getNames t acc
getNames (Case x t alts) acc = getNames t $ foldl getAltNames acc alts
where
getAltNames : List QName -> CaseAlt -> List QName
getAltNames acc (CaseDefault t) = getNames t acc
getAltNames acc (CaseCons name args t) = name :: getNames t acc
getAltNames acc (CaseLit lit t) = getNames t acc
getNames _ acc = acc
-- returns a QName -> Def of in-use entries
-- This will be what we work on for optimization passes
getEntries : SortedMap QName Def QName M (SortedMap QName Def)
getEntries acc name = do
top <- getTop
case lookup name top of
Nothing => do
putStrLn "bad name \{show name}"
pure acc
Just (MkEntry _ name type def@(Fn exp) _) => case lookupMap' name acc of
Just _ => pure acc
Nothing => do
top <- getTop
exp <- zonk top 0 Nil exp
let acc = updateMap name (Fn exp) acc
foldlM getEntries acc $ getNames exp Nil
Just (MkEntry _ name type def@(PrimFn _ _ used) _) =>
let acc = updateMap name def acc in
foldlM getEntries acc used
Just entry => pure $ updateMap name entry.def acc
/-
## Sort names by dependencies
This code is _way too subtle. The problem goes away if I wrap `() =>` around 0-ary top level functions. But I'm
stubborn, so I'm giving it a try. Changing codegen may repeatedly break this and require switching to `() =>`.
The idea here is to get a list of names to emit in order of dependencies. But top level 0-ary functions can reference
and call names at startup. They can't reference something that hasn't been declared yet and can't call something that
hasn't been defined.
As an example, a recursive show instance:
- References the `show` function
- `show` function references the instance under a lambda (not inlining this yet)
- We hit the bare function first, it depends on the instance (because of recursion), which depends on the
function, but loop prevention cuts.
We have main at the top, it is called so we visit it deep. We do a depth-first traversal, but will distinguish whether
we're visiting shallow or deep. We're trying to avoid hitting issues with indirect circular references.
- Anything we visit deep, we ensure is visited shallow first
- Shallow doesn't go into function bodies, but we do go into lambdas
- Anything invoked with arguments while shallow is visited deep, anything referenced or partially applied is still shallow.
- We keep track of both shallow and deep visits in our accumuulator
- Shallow represents the declaration, so we filter to those at the end
TODO this could be made faster by keeping a map of the done information
-/
sortedNames : SortedMap QName CExp QName List QName
sortedNames defs qn = map snd $ filter (not fst) $ go Nil Nil (True, qn)
where
getBody : CAlt CExp
getBody (CConAlt _ _ _ _ t) = t
getBody (CLitAlt _ t) = t
getBody (CDefAlt t) = t
-- deep if this qn is being applied to something
getNames : (deep : Bool) List (Bool × QName) CExp List (Bool × QName)
-- liftIO calls a lambda statically
getNames deep acc (CLam _ t) = getNames deep acc t
-- top level 0-ary function, doesn't happen
getNames deep acc (CFun _ t) = if deep then getNames deep acc t else acc
-- 0-ary call is not deep invocation
getNames deep acc (CAppRef nm Nil 0) = (True, nm) :: acc
-- full call is deep ref to the head, arguments may be applied by `nm`
getNames deep acc (CAppRef nm ts 0) = foldl (getNames True) ((True, nm) :: acc) ts
-- non-zero are closures
getNames deep acc (CAppRef nm ts _) = foldl (getNames deep) ((deep, nm) :: acc) ts
-- True is needed for an issue in the parser. symbol -> keyword -> indented
-- TODO look at which cases generate CApp
getNames deep acc (CApp t u) = getNames True (getNames deep acc u) t
getNames deep acc (CCase t alts) = foldl (getNames deep) acc $ t :: map getBody alts
-- we're not calling it
getNames deep acc (CRef qn) = (deep, qn) :: acc
getNames deep acc (CLet _ t u) = getNames deep (getNames deep acc t) u
getNames deep acc (CLetRec _ t u) = getNames deep (getNames deep acc t) u
getNames deep acc (CConstr _ _ ts) = foldl (getNames deep) acc ts
-- if the CRaw is called, then the deps are called
getNames deep acc (CRaw _ deps) = map (_,_ deep) deps ++ acc
-- wrote these out so I get an error when I add a new constructor
getNames deep acc (CLit _) = acc
getNames deep acc (CMeta _) = acc
getNames deep acc (CBnd _) = acc
getNames deep acc CErased = acc
getNames deep acc (CPrimOp op t u) = getNames deep (getNames deep acc t) u
-- recurse on all dependencies, pushing onto acc
go : List (Bool × QName) List (Bool × QName) (Bool × QName) List (Bool × QName)
-- Need to force shallow if we're doing deep and haven't done shallow.
go loop acc this@(deep, qn) =
-- there is a subtle issue here with an existing (False, qn) vs (True, qn)
let acc = if deep && not (elem (False, qn) acc) && not (elem (False, qn) loop)
then go loop acc (False, qn)
else acc
in if elem this loop
then acc
else if elem this acc then acc
else
case lookupMap' qn defs of
Nothing => acc -- only `bouncer`
Just exp => this :: foldl (go $ this :: loop) acc (getNames deep Nil exp)
eraseEntries : {{Ref2 Defs St}} M Unit
eraseEntries = do
defs <- getRef Defs
ignore $ traverse go $ toList defs
where
go : {{Ref2 Defs St}} (QName × Def) M Unit
go (qn, Fn tm) = do
tm' <- erase Nil tm Nil
modifyRef Defs $ updateMap qn (Fn tm')
go _ = pure MkUnit
-- given a initial function, return a dependency-ordered list of javascript source
process : QName M (List Doc)
process name = do
top <- getTop
entries <- getEntries emptyMap name
-- Maybe move this dance into liftWhere
ref <- newIORef entries
let foo = MkRef ref -- for the autos below
eraseEntries
liftWhere
entries <- readIORef ref
-- Now working with defs
exprs <- mapM defToCExp $ toList entries
let cexpMap = foldMap const emptyMap exprs
cexpMap <- tailCallOpt cexpMap
-- Not needed for JS, uncomment to test
-- cexpMap <- liftLambda cexpMap
let names = sortedNames cexpMap name
pure $ map cexpToDoc $ mapMaybe (\x => lookupMap x cexpMap) names
compile : M (List Doc)
compile = do
top <- getTop
case lookupRaw "main" top of
Just (MkEntry fc name type def _) => do
tmp <- process name
-- tack on call to main function
let exec = stmtToDoc $ JPlain $ Apply (Var $ show name) Nil
pure $ reverse (exec :: tmp)
Nothing =>
-- TODO maybe emit everything if there is no main
error emptyFC "No main function found"