initial commit
This commit is contained in:
159
src/scanner.c
Normal file
159
src/scanner.c
Normal file
@@ -0,0 +1,159 @@
|
||||
#include "tree_sitter/parser.h"
|
||||
#include "tree_sitter/alloc.h"
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
// not available in wasm
|
||||
// lexer->log(...) is documented upstream, but is not in parser.h
|
||||
#define fprintf(...) //
|
||||
|
||||
typedef struct {
|
||||
uint32_t len;
|
||||
uint32_t cap;
|
||||
uint32_t *data;
|
||||
} State;
|
||||
|
||||
enum TokenType {
|
||||
VIRT_START,
|
||||
VIRT_SEMI,
|
||||
VIRT_END,
|
||||
WHITESPACE,
|
||||
};
|
||||
|
||||
static void ensure(State *state, uint32_t count) {
|
||||
if (state->cap < count) {
|
||||
state->cap = count * 2;
|
||||
uint32_t *new_data = ts_malloc(sizeof(uint32_t) * state->cap);
|
||||
memcpy(new_data, state->data, state->len * sizeof(uint32_t));
|
||||
ts_free(state->data);
|
||||
state->data = new_data;
|
||||
}
|
||||
}
|
||||
|
||||
static void push(State *state, uint32_t col) {
|
||||
// fprintf(stderr, "push %d\n", col);
|
||||
ensure(state, state->len + 1);
|
||||
state->data[state->len++] = col;
|
||||
}
|
||||
|
||||
static uint32_t pop(State *state) {
|
||||
if (state->len) {
|
||||
// fprintf(stderr, "pop %d\n", state->data[state->len-1]);
|
||||
state->len--;
|
||||
return state->data[state->len];
|
||||
}
|
||||
fprintf(stderr, "stack underflow");
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int32_t peek(State *state) {
|
||||
return state->len ? state->data[state->len - 1] : -1; // or -1?
|
||||
}
|
||||
|
||||
#define PEEK lexer->lookahead
|
||||
#define PEEK_WS (PEEK == ' ' || PEEK == '\n' || PEEK == '\t')
|
||||
|
||||
/**
|
||||
* The custom scanner is responsible for the virtual indent, outdent, and semi tokens.
|
||||
* Additionally it handles whitespace. This allows us to give the virtual tokens priority over
|
||||
* whitespace. So tree-sitter can only advance over whitespace if there is enough of it or if
|
||||
* it gets a START, SEMI, or END.
|
||||
*/
|
||||
bool tree_sitter_newt_external_scanner_scan(State *state, TSLexer *lexer,
|
||||
const bool *syms) {
|
||||
fprintf(stderr, "scan %d %d %d %d\n", syms[0], syms[1], syms[2], syms[3]);
|
||||
|
||||
// skip whitespace
|
||||
bool ws = false;
|
||||
while (PEEK == ' ' || PEEK == '\n' || PEEK == '\t') {
|
||||
ws = true;
|
||||
lexer->advance(lexer,true);
|
||||
}
|
||||
|
||||
// Might have to deal with comments in here.
|
||||
if (PEEK == '-' || PEEK == '{') {
|
||||
if (syms[WHITESPACE] && ws) {
|
||||
lexer->result_symbol = WHITESPACE;
|
||||
return true;
|
||||
}
|
||||
// comments don't count for START/SEMI/END, let tree-sitter process the comment and get back to us
|
||||
return false;
|
||||
}
|
||||
|
||||
int32_t cur = peek(state);
|
||||
uint32_t col = lexer->get_column(lexer);
|
||||
if (ws && syms[VIRT_START]) {
|
||||
fprintf(stderr, "start [%d %d %d %d] %d %d\n", syms[0], syms[1], syms[2],
|
||||
syms[3], col, cur);
|
||||
push(state, col);
|
||||
lexer->result_symbol = VIRT_START;
|
||||
return true;
|
||||
}
|
||||
// if we are in a smaller column, we force virt_end
|
||||
if (syms[VIRT_END]) {
|
||||
|
||||
if (col < cur) {
|
||||
fprintf(stderr, "end [%d %d %d %d] %d %d\n", syms[0], syms[1], syms[2],
|
||||
syms[3], col, cur);
|
||||
pop(state);
|
||||
lexer->result_symbol = VIRT_END;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
// but we can't do that for semi?
|
||||
if (syms[VIRT_SEMI]) {
|
||||
// FIXME - not eof, but we are requiring one at end of file at the moment.
|
||||
if (!lexer->eof(lexer) && col == cur) {
|
||||
lexer->result_symbol = VIRT_SEMI;
|
||||
fprintf(stderr, "semi [%d %d %d %d] %d %d\n", syms[0], syms[1], syms[2],
|
||||
syms[3], col, cur);
|
||||
return true;
|
||||
} else {
|
||||
fprintf(stderr, "not semi [%d %d %d %d] %d %d\n", syms[0], syms[1],
|
||||
syms[2], syms[3], col, cur);
|
||||
}
|
||||
}
|
||||
|
||||
if (syms[WHITESPACE] && ws) {
|
||||
fprintf(stderr, "whitespace %d\n", cur);
|
||||
lexer->result_symbol = WHITESPACE;
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void *tree_sitter_newt_external_scanner_create() {
|
||||
State *state = calloc(sizeof(State), 1);
|
||||
state->cap = 20;
|
||||
state->data = ts_malloc(sizeof(uint32_t) * state->cap);
|
||||
// put the initial level at 0 and use semi at top level
|
||||
push(state, 0);
|
||||
return state;
|
||||
}
|
||||
|
||||
void tree_sitter_newt_external_scanner_destroy(State *state) {
|
||||
ts_free(state->data);
|
||||
ts_free(state);
|
||||
}
|
||||
|
||||
unsigned tree_sitter_newt_external_scanner_serialize(State *state,
|
||||
char *buffer) {
|
||||
unsigned size = sizeof(state->data[0]) * state->len;
|
||||
if (size > TREE_SITTER_SERIALIZATION_BUFFER_SIZE) {
|
||||
return 0;
|
||||
}
|
||||
memcpy(buffer, state->data, size);
|
||||
return size;
|
||||
}
|
||||
|
||||
void tree_sitter_newt_external_scanner_deserialize(State *state,
|
||||
char *buffer,
|
||||
unsigned length) {
|
||||
unsigned len = length / sizeof(state->data[0]);
|
||||
if (len > 0) {
|
||||
ensure(state, len);
|
||||
state->len = len;
|
||||
memcpy(state->data, buffer, length);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user