Add support for nested comments
Unfortunately, the 'extras' array expects single tokens only, so one can't really use a recursive parsing rule to express nesting comments...
This commit is contained in:
parent
75bb2c7009
commit
50a0eaa071
20
grammar.js
20
grammar.js
|
@ -25,6 +25,11 @@ module.exports = grammar({
|
|||
[$._lvalue, $.array_expression],
|
||||
],
|
||||
|
||||
externals: ($) => [
|
||||
// Nested comments need to be tokenized externally
|
||||
$.comment,
|
||||
],
|
||||
|
||||
extras: ($) => [
|
||||
/( |\n|\r|\t)+/,
|
||||
$.comment,
|
||||
|
@ -36,21 +41,6 @@ module.exports = grammar({
|
|||
optional($._declaration_chunks),
|
||||
),
|
||||
|
||||
comment: ($) => token(
|
||||
seq(
|
||||
"/*",
|
||||
repeat(
|
||||
choice(
|
||||
// Match anything but the end-delimiter
|
||||
/(\*[^/]|[^*])+/,
|
||||
// Comments can be nested
|
||||
// $.comment,
|
||||
),
|
||||
),
|
||||
"*/",
|
||||
),
|
||||
),
|
||||
|
||||
// Expressions {{{
|
||||
|
||||
_expr: ($) => choice(
|
||||
|
|
|
@ -23,34 +23,6 @@
|
|||
}
|
||||
]
|
||||
},
|
||||
"comment": {
|
||||
"type": "TOKEN",
|
||||
"content": {
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{
|
||||
"type": "STRING",
|
||||
"value": "/*"
|
||||
},
|
||||
{
|
||||
"type": "REPEAT",
|
||||
"content": {
|
||||
"type": "CHOICE",
|
||||
"members": [
|
||||
{
|
||||
"type": "PATTERN",
|
||||
"value": "(\\*[^/]|[^*])+"
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "STRING",
|
||||
"value": "*/"
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"_expr": {
|
||||
"type": "CHOICE",
|
||||
"members": [
|
||||
|
@ -1468,7 +1440,12 @@
|
|||
]
|
||||
],
|
||||
"precedences": [],
|
||||
"externals": [],
|
||||
"externals": [
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
"name": "comment"
|
||||
}
|
||||
],
|
||||
"inline": [],
|
||||
"supertypes": []
|
||||
}
|
||||
|
|
3172
src/parser.c
3172
src/parser.c
File diff suppressed because it is too large
Load diff
98
src/scanner.c
Normal file
98
src/scanner.c
Normal file
|
@ -0,0 +1,98 @@
|
|||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
|
||||
#include <tree_sitter/parser.h>
|
||||
|
||||
enum TokenType {
|
||||
COMMENT,
|
||||
};
|
||||
|
||||
static int32_t advance(TSLexer *lexer) {
|
||||
int32_t last = lexer->lookahead;
|
||||
lexer->advance(lexer, false);
|
||||
return last;
|
||||
}
|
||||
|
||||
static bool expect(TSLexer *lexer, int32_t expected) {
|
||||
if (lexer->eof(lexer) || lexer->lookahead != expected) {
|
||||
return false;
|
||||
}
|
||||
advance(lexer);
|
||||
return true;
|
||||
}
|
||||
|
||||
static void skip_whitespace(TSLexer *lexer) {
|
||||
while (!lexer->eof(lexer)) {
|
||||
switch (lexer->lookahead) {
|
||||
case ' ':
|
||||
case '\t':
|
||||
case '\n':
|
||||
case '\r':
|
||||
lexer->advance(lexer, true);
|
||||
default:
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Comments start with "/*", end with "*/", and can be nested like OCaml
|
||||
static bool scan_comment(TSLexer *lexer) {
|
||||
// '/' already consumed outside of the function
|
||||
if (!expect(lexer, '*')) {
|
||||
return false; // Division etc...
|
||||
}
|
||||
|
||||
unsigned long level = 1;
|
||||
|
||||
while (level > 0 && !lexer->eof(lexer)) {
|
||||
switch (advance(lexer)) {
|
||||
case '/':
|
||||
if (expect(lexer, '*')) {
|
||||
++level;
|
||||
}
|
||||
break;
|
||||
case '*':
|
||||
if (expect(lexer, '/')) {
|
||||
--level;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return level == 0;
|
||||
}
|
||||
|
||||
void *tree_sitter_tiger_external_scanner_create() {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void tree_sitter_tiger_external_scanner_destroy(void *payload) {}
|
||||
|
||||
unsigned tree_sitter_tiger_external_scanner_serialize(void *payload,
|
||||
char *buffer) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
void tree_sitter_tiger_external_scanner_deserialize(void *payload,
|
||||
char const *buffer,
|
||||
unsigned length) {}
|
||||
|
||||
bool tree_sitter_tiger_external_scanner_scan(void *payload,
|
||||
TSLexer *lexer,
|
||||
bool const *valid_symbols) {
|
||||
// Only try to scan when appropriate
|
||||
if (!valid_symbols[COMMENT]) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Apparently it is expected of us to skip all whitespace by ourselves...
|
||||
skip_whitespace(lexer);
|
||||
|
||||
// Comments start with "/*", scan_comment expects '/' to have been consumed
|
||||
if (expect(lexer, '/')) {
|
||||
lexer->result_symbol = COMMENT;
|
||||
return scan_comment(lexer);
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
|
@ -51,13 +51,7 @@ Nested comment
|
|||
--------------------------------------------------------------------------------
|
||||
|
||||
(source_file
|
||||
(comment)
|
||||
(identifier)
|
||||
(ERROR
|
||||
(operator)
|
||||
(operator)
|
||||
(operator)
|
||||
(operator)))
|
||||
(comment))
|
||||
|
||||
================================================================================
|
||||
Unterminated comment
|
||||
|
|
Loading…
Reference in a new issue