partially working

This commit is contained in:
2026-03-05 22:14:17 -08:00
parent bb510fea4a
commit 7518c95769
9 changed files with 25288 additions and 10048 deletions

View File

@@ -5,4 +5,4 @@ Work in progress.
Not completely accurate, intended for editor use. We're ignoring the existance of mixfix and parsing as an app list.
The layout is doing the Haskel fake token thing for now.
The layout is doing the Haskel fake token thing for now. I'm not sure it's doing what I want though. Newt will kill tokens if they're out of indent. tree-sitter will not ask for START if it's not expected? .. Maybe we return one anyway.

View File

@@ -41,8 +41,8 @@ module.exports = grammar({
seq("/-", /([^-]|-+[^/])-/, "/"),
),
),
_arr: ($) => choice("->", "→"),
number: $ => /\d+/,
lamExpr: $ => seq(
choice("\\", "λ"),
repeat1($.identifier),
@@ -50,16 +50,19 @@ module.exports = grammar({
$.typeExpr
),
// hole, parenTypeExpression, record update
_atom: $ => choice($.varname, $.strLit, $.operator, seq("(", $.typeExpr, ")")),
_parg: $ => choice($._atom, seq("{{", $.typeExpr, "}}"), seq("{", $.typeExpr, "}")),
appExpr: $ => seq($._atom, repeat($._parg)),
_atom: $ => choice($.identifier, $.string, $.character, $.number, $.recUpdate, seq("(", $.typeExpr, ")")),
_parg: $ => choice(seq("{{", $.typeExpr, "}}"), seq("{", $.typeExpr, "}"), $._atom),
recUpdate: $ => seq("[", sep(";", seq($.identifier, choice(":=", "$="), $.term)), "]"),
_appExpr: $ => (seq($._atom, repeat($._parg))),
qname: ($) => sep1(".", $.identifier),
strLit: $ => /"[^"]*"/,
doCaseLet: $ => seq("let", "(", $.term, ")", "=", $.typeExpr,
layout($, $._orAlt)),
string: $ => /"[^"]*"/,
character: $ => /'(\\)?.'/,
doCaseLet: $ => seq("let", "(", $.term, ")", "=", $.typeExpr, repeat($.orAlt)),
caseAlt: $ => seq($.term, "=>", $.term),
_orAlt: $ => seq("|", $.caseAlt),
_doArrow: $ => seq("<-", $.typeExpr, optional(layout($, $._orAlt))),
orAlt: $ => seq("|", $.caseAlt),
// layout was causing trouble here. I kinda wanted to ditch it, but there
// could be a shift/reduce thing in the real parser
_doArrow: $ => seq("<-", $.typeExpr, repeat($.orAlt)),
doArrow: $ => seq($.term, optional($._doArrow)),
doLet: $ => seq("let", $.identifier, "=", $.term),
_doExpr: $ => choice(
@@ -76,26 +79,26 @@ module.exports = grammar({
$.lamExpr,
$.doBlock,
$.ifThen,
$.appExpr,
$._appExpr,
),
term: ($) => prec.right(seq($._term2, repeat(seq("$", $._term2)))),
// varname is ident|uident|_, but we'll gloss over that
varname: ($) => $.identifier,
// the "$" becomes operator and we get past the bit in main, but
// it's going to fail on a "$" \ ...
// why doesn't "$" work here?
dollar: $ => seq("$", $.term),
term: ($) => prec.right(seq($._term2, optional($.dollar))),
// abind/ibind/ebind in Parser.newt
binder: ($) =>
choice(
seq("(", $.identifier, ":", $.typeExpr, ")"),
// seq("(", $.typeExpr, ")"),
// repeat($.identifier) has a conflict
seq("(", alias(optional("0"), "quantity"), $.identifier, ":", $.typeExpr, ")"),
seq("{{", $.typeExpr, "}}"),
seq("{", $.identifier, ":", $.typeExpr, "}"),
seq("{", alias(optional("0"), "quantity"), repeat1($.identifier), ":", $.typeExpr, "}"),
),
_arr: ($) => choice("->", ""),
forall: ($) => seq("", repeat1($.identifier), ".", $.typeExpr),
binders: ($) => seq(choice($.varname, repeat1($.binder)), $._arr, $.typeExpr),
typeExpr: ($) => choice($.forall, $.binders, $.term),
binders: ($) => seq(choice(repeat1($.binder)), $._arr, $.typeExpr),
typeExpr: ($) => prec.right(choice($.forall, $.binders, seq($.term, optional(seq($._arr, $.typeExpr))))),
// pitype: ($) =>
// seq(
@@ -104,37 +107,76 @@ module.exports = grammar({
// $.identifier,
// ),
sigDecl: ($) => seq($.identifier, ":", $.typeExpr),
defDecl: ($) => seq($.appExpr, "=", $.typeExpr),
whereClause: $ => seq("where", layout($, choice($.sigDecl, $.defDecl))),
defDecl: ($) => seq(alias($._appExpr, $.lhs), "=", $.typeExpr, optional($.whereClause)),
shortDataDecl: $ => seq(
"data",
alias($.identifier, "typeName"),
repeat($.identifier),
"=",
sep1("|", seq(alias($.identifier, "conName"), repeat($._atom)))
),
dataDecl: ($) =>
seq(
"data",
$.identifier,
alias($.identifier, "typeName"),
":",
$.typeExpr,
optional(seq("where", layout($, $.conDef))),
// the layout here can be empty (so no start tag)
// optional doesn't seem to help, so we have an error at void
optional(seq("where", optional(layout($, $.sigDecl)))),
),
jsLitString: $ => /`[^`]+`/,
deriveDecl: $ => seq("derive", repeat1($.identifier)),
pfuncDecl: ($) => seq(
"pfunc",
alias($.identifier, "name"),
optional(seq("uses", "(", repeat1($.identifier), ")")),
":",
$.typeExpr,
":=",
$.jsLitString
),
ptypeDecl: $ => seq(
"ptype",
alias($.identifier, $.name),
optional(seq(":", $.typeExpr))
),
importDef: ($) => seq("import", $.qname),
conDef: ($) =>
mixfixDecl: $ => seq(
choice("infixr", "infixl"),
$.number,
repeat1(alias($.identifier, $.name))
),
classDecl: $ =>
seq(
$.identifier, // upper
":",
$.typeExpr
"class",
seq(alias($.identifier, $.className), repeat($._atom)),
"where",
layout($, $.sigDecl)
),
instanceDecl: $ => seq(
"instance",
$.typeExpr,
"where",
layout($, choice($.sigDecl, $.defDecl))
),
_decl: ($) =>
choice(
// mixfixDecl,
// ptypeDecl
// pfuncDecl
$.mixfixDecl,
$.ptypeDecl,
$.pfuncDecl,
$.dataDecl,
// shortDataDecl
// classDecl
// instanceDecl
// recordDecl
// exportDecl
// deriveDecl
$.shortDataDecl,
$.classDecl,
$.instanceDecl,
// $.recordDecl,
// $.exportDecl,
$.deriveDecl,
$.sigDecl,
$.defDecl,
),
colon: _ => ":",
module: ($) =>
seq(
"module",
@@ -142,8 +184,11 @@ module.exports = grammar({
repeat(seq($.semi, $.importDef)),
repeat(seq($.semi, $._decl)),
),
// these are _way_ more generous in newt
operator: ($) => /[!#$%&*+.,/<=>?@\\^|-]+/,
identifier: ($) => /[A-Za-z_][\w']*|[,]|\+\+/,
// oof, sort this out.
// operator: ($) => /xxxx[∘!#$%&*+,./<=>?@^|-]+/,
// Don't think we need this at this point.
// adding "," here does all sorts of harm...
identifier: ($) => /_,_|,|([^()\\{}\[\],.@;\s ])[^()\\{}\[\],.@;\s ]*/,
},
});

21
queries/highlights.scm Normal file
View File

@@ -0,0 +1,21 @@
[
"let"
; "in"
"where"
; "case" "of"
"data"
;"U"
"do"
"ptype" "pfunc"
"module"
; "infixl" "infixr" "infix"
"∀" ; "forall"
"import"
"uses" "derive"
"class" "instance" ;"record" "constructor"
"if" "then" "else"
"|" "<-" "=>" "$" ":"
] @keyword
(comment) @comment
(string) @string
(jsLitString) @string

9
queries/locals.scm Normal file
View File

@@ -0,0 +1,9 @@
; what is all this, should I push it in?
(sigDecl
(identifier) @local.definition)
; @reference.implementation
; @definition.interface
; @definition.class
; @definition.function
; @defintion.method
; @reference.class -- not sure I can distinguish this from a constructor ref,

3
queries/textobjects.scm Normal file
View File

@@ -0,0 +1,3 @@
[
(dataDecl)
] @class.around

889
src/grammar.json generated

File diff suppressed because it is too large Load Diff

511
src/node-types.json generated
View File

@@ -1,31 +1,4 @@
[
{
"type": "appExpr",
"named": true,
"fields": {},
"children": {
"multiple": true,
"required": true,
"types": [
{
"type": "operator",
"named": true
},
{
"type": "strLit",
"named": true
},
{
"type": "typeExpr",
"named": true
},
{
"type": "varname",
"named": true
}
]
}
},
{
"type": "binder",
"named": true,
@@ -60,10 +33,6 @@
{
"type": "typeExpr",
"named": true
},
{
"type": "varname",
"named": true
}
]
}
@@ -84,17 +53,53 @@
}
},
{
"type": "conDef",
"type": "classDecl",
"named": true,
"fields": {},
"children": {
"multiple": true,
"required": true,
"types": [
{
"type": "character",
"named": true
},
{
"type": "className",
"named": true
},
{
"type": "end",
"named": true
},
{
"type": "identifier",
"named": true
},
{
"type": "number",
"named": true
},
{
"type": "recUpdate",
"named": true
},
{
"type": "semi",
"named": true
},
{
"type": "sigDecl",
"named": true
},
{
"type": "start",
"named": true
},
{
"type": "string",
"named": true
},
{
"type": "typeExpr",
"named": true
@@ -110,20 +115,16 @@
"multiple": true,
"required": true,
"types": [
{
"type": "conDef",
"named": true
},
{
"type": "end",
"named": true
},
{
"type": "identifier",
"type": "semi",
"named": true
},
{
"type": "semi",
"type": "sigDecl",
"named": true
},
{
@@ -146,12 +147,31 @@
"required": true,
"types": [
{
"type": "appExpr",
"type": "lhs",
"named": true
},
{
"type": "typeExpr",
"named": true
},
{
"type": "whereClause",
"named": true
}
]
}
},
{
"type": "deriveDecl",
"named": true,
"fields": {},
"children": {
"multiple": true,
"required": true,
"types": [
{
"type": "identifier",
"named": true
}
]
}
@@ -165,19 +185,7 @@
"required": true,
"types": [
{
"type": "caseAlt",
"named": true
},
{
"type": "end",
"named": true
},
{
"type": "semi",
"named": true
},
{
"type": "start",
"type": "orAlt",
"named": true
},
{
@@ -235,19 +243,7 @@
"required": true,
"types": [
{
"type": "caseAlt",
"named": true
},
{
"type": "end",
"named": true
},
{
"type": "semi",
"named": true
},
{
"type": "start",
"type": "orAlt",
"named": true
},
{
@@ -280,6 +276,21 @@
]
}
},
{
"type": "dollar",
"named": true,
"fields": {},
"children": {
"multiple": false,
"required": true,
"types": [
{
"type": "term",
"named": true
}
]
}
},
{
"type": "forall",
"named": true,
@@ -329,6 +340,41 @@
]
}
},
{
"type": "instanceDecl",
"named": true,
"fields": {},
"children": {
"multiple": true,
"required": true,
"types": [
{
"type": "defDecl",
"named": true
},
{
"type": "end",
"named": true
},
{
"type": "semi",
"named": true
},
{
"type": "sigDecl",
"named": true
},
{
"type": "start",
"named": true
},
{
"type": "typeExpr",
"named": true
}
]
}
},
{
"type": "lamExpr",
"named": true,
@@ -348,6 +394,60 @@
]
}
},
{
"type": "lhs",
"named": true,
"fields": {},
"children": {
"multiple": true,
"required": true,
"types": [
{
"type": "character",
"named": true
},
{
"type": "identifier",
"named": true
},
{
"type": "number",
"named": true
},
{
"type": "recUpdate",
"named": true
},
{
"type": "string",
"named": true
},
{
"type": "typeExpr",
"named": true
}
]
}
},
{
"type": "mixfixDecl",
"named": true,
"fields": {},
"children": {
"multiple": true,
"required": true,
"types": [
{
"type": "name",
"named": true
},
{
"type": "number",
"named": true
}
]
}
},
{
"type": "module",
"named": true,
@@ -356,6 +456,10 @@
"multiple": true,
"required": true,
"types": [
{
"type": "classDecl",
"named": true
},
{
"type": "dataDecl",
"named": true
@@ -364,6 +468,10 @@
"type": "defDecl",
"named": true
},
{
"type": "deriveDecl",
"named": true
},
{
"type": "identifier",
"named": true
@@ -372,10 +480,30 @@
"type": "importDef",
"named": true
},
{
"type": "instanceDecl",
"named": true
},
{
"type": "mixfixDecl",
"named": true
},
{
"type": "pfuncDecl",
"named": true
},
{
"type": "ptypeDecl",
"named": true
},
{
"type": "semi",
"named": true
},
{
"type": "shortDataDecl",
"named": true
},
{
"type": "sigDecl",
"named": true
@@ -383,6 +511,63 @@
]
}
},
{
"type": "orAlt",
"named": true,
"fields": {},
"children": {
"multiple": false,
"required": true,
"types": [
{
"type": "caseAlt",
"named": true
}
]
}
},
{
"type": "pfuncDecl",
"named": true,
"fields": {},
"children": {
"multiple": true,
"required": true,
"types": [
{
"type": "identifier",
"named": true
},
{
"type": "jsLitString",
"named": true
},
{
"type": "typeExpr",
"named": true
}
]
}
},
{
"type": "ptypeDecl",
"named": true,
"fields": {},
"children": {
"multiple": true,
"required": true,
"types": [
{
"type": "name",
"named": true
},
{
"type": "typeExpr",
"named": true
}
]
}
},
{
"type": "qname",
"named": true,
@@ -398,6 +583,60 @@
]
}
},
{
"type": "recUpdate",
"named": true,
"fields": {},
"children": {
"multiple": true,
"required": false,
"types": [
{
"type": "identifier",
"named": true
},
{
"type": "term",
"named": true
}
]
}
},
{
"type": "shortDataDecl",
"named": true,
"fields": {},
"children": {
"multiple": true,
"required": false,
"types": [
{
"type": "character",
"named": true
},
{
"type": "identifier",
"named": true
},
{
"type": "number",
"named": true
},
{
"type": "recUpdate",
"named": true
},
{
"type": "string",
"named": true
},
{
"type": "typeExpr",
"named": true
}
]
}
},
{
"type": "sigDecl",
"named": true,
@@ -442,13 +681,21 @@
"required": true,
"types": [
{
"type": "appExpr",
"type": "character",
"named": true
},
{
"type": "doBlock",
"named": true
},
{
"type": "dollar",
"named": true
},
{
"type": "identifier",
"named": true
},
{
"type": "ifThen",
"named": true
@@ -456,6 +703,22 @@
{
"type": "lamExpr",
"named": true
},
{
"type": "number",
"named": true
},
{
"type": "recUpdate",
"named": true
},
{
"type": "string",
"named": true
},
{
"type": "typeExpr",
"named": true
}
]
}
@@ -465,7 +728,7 @@
"named": true,
"fields": {},
"children": {
"multiple": false,
"multiple": true,
"required": true,
"types": [
{
@@ -479,20 +742,40 @@
{
"type": "term",
"named": true
},
{
"type": "typeExpr",
"named": true
}
]
}
},
{
"type": "varname",
"type": "whereClause",
"named": true,
"fields": {},
"children": {
"multiple": false,
"multiple": true,
"required": true,
"types": [
{
"type": "identifier",
"type": "defDecl",
"named": true
},
{
"type": "end",
"named": true
},
{
"type": "semi",
"named": true
},
{
"type": "sigDecl",
"named": true
},
{
"type": "start",
"named": true
}
]
@@ -502,6 +785,10 @@
"type": "$",
"named": false
},
{
"type": "$=",
"named": false
},
{
"type": "(",
"named": false
@@ -522,6 +809,14 @@
"type": ":",
"named": false
},
{
"type": ":=",
"named": false
},
{
"type": ";",
"named": false
},
{
"type": "<-",
"named": false
@@ -534,19 +829,47 @@
"type": "=>",
"named": false
},
{
"type": "[",
"named": false
},
{
"type": "\\",
"named": false
},
{
"type": "]",
"named": false
},
{
"type": "character",
"named": true
},
{
"type": "class",
"named": false
},
{
"type": "className",
"named": true
},
{
"type": "comment",
"named": true,
"extra": true
},
{
"type": "conName",
"named": false
},
{
"type": "data",
"named": false
},
{
"type": "derive",
"named": false
},
{
"type": "do",
"named": false
@@ -571,6 +894,22 @@
"type": "import",
"named": false
},
{
"type": "infixl",
"named": false
},
{
"type": "infixr",
"named": false
},
{
"type": "instance",
"named": false
},
{
"type": "jsLitString",
"named": true
},
{
"type": "let",
"named": false
@@ -580,9 +919,29 @@
"named": false
},
{
"type": "operator",
"type": "name",
"named": true
},
{
"type": "name",
"named": false
},
{
"type": "number",
"named": true
},
{
"type": "pfunc",
"named": false
},
{
"type": "ptype",
"named": false
},
{
"type": "quantity",
"named": false
},
{
"type": "semi",
"named": true
@@ -592,13 +951,21 @@
"named": true
},
{
"type": "strLit",
"type": "string",
"named": true
},
{
"type": "then",
"named": false
},
{
"type": "typeName",
"named": false
},
{
"type": "uses",
"named": false
},
{
"type": "where",
"named": false

33765
src/parser.c generated

File diff suppressed because it is too large Load Diff

View File

@@ -82,7 +82,9 @@ bool tree_sitter_newt_external_scanner_scan(State *state, TSLexer *lexer,
int32_t cur = peek(state);
uint32_t col = lexer->get_column(lexer);
if (ws && syms[VIRT_START]) {
// START must indent more
// We have `ws` so we make forward progress
if (ws && syms[VIRT_START] && cur < col) {
fprintf(stderr, "start [%d %d %d %d] %d %d\n", syms[0], syms[1], syms[2],
syms[3], col, cur);
push(state, col);
@@ -90,7 +92,10 @@ bool tree_sitter_newt_external_scanner_scan(State *state, TSLexer *lexer,
return true;
}
// if we are in a smaller column, we force virt_end
if (syms[VIRT_END]) {
// even if it's not expected (I think this is important)
// on the editor side there is a `then` expected vs outdented `then`, but
// maybe GLR can detect a "stray" END token?
if (syms[VIRT_END] || true) {
if (col < cur) {
fprintf(stderr, "end [%d %d %d %d] %d %d\n", syms[0], syms[1], syms[2],