Add simple atoms

This commit is contained in:
Bruno BELANYI 2022-06-01 19:33:42 +02:00
parent 30b2e50367
commit e7ba93870e
5 changed files with 530 additions and 37 deletions

View file

@ -2,8 +2,39 @@ module.exports = grammar({
name: "tiger",
rules: {
// TODO: add the actual grammar rules
source_file: $ => 'hello'
source_file: ($) => choice(
$._expr,
),
_expr: ($) => choice(
"nil",
$.integer_literal,
$.string_literal,
),
integer_literal: (_) => /[0-9]+/,
string_literal: ($) => seq(
'"',
repeat(choice($.escape_sequence, /[^"\\]+/)),
'"',
),
escape_sequence: (_) => token.immediate(
seq(
"\\",
choice(
// Special escapes
choice("a", "b", "f", "n", "r", "t", "v"),
// Octal
/[0-3][0-7]{2}/,
// Hexadecimal
seq("x", /[0-9a-fA-F]{2}/),
// Escaped characters
choice("\\", '"'),
)
)
),
}
});

View file

@ -2,8 +2,143 @@
"name": "tiger",
"rules": {
"source_file": {
"type": "CHOICE",
"members": [
{
"type": "SYMBOL",
"name": "_expr"
}
]
},
"_expr": {
"type": "CHOICE",
"members": [
{
"type": "STRING",
"value": "hello"
"value": "nil"
},
{
"type": "SYMBOL",
"name": "integer_literal"
},
{
"type": "SYMBOL",
"name": "string_literal"
}
]
},
"integer_literal": {
"type": "PATTERN",
"value": "[0-9]+"
},
"string_literal": {
"type": "SEQ",
"members": [
{
"type": "STRING",
"value": "\""
},
{
"type": "REPEAT",
"content": {
"type": "CHOICE",
"members": [
{
"type": "SYMBOL",
"name": "escape_sequence"
},
{
"type": "PATTERN",
"value": "[^\"\\\\]+"
}
]
}
},
{
"type": "STRING",
"value": "\""
}
]
},
"escape_sequence": {
"type": "IMMEDIATE_TOKEN",
"content": {
"type": "SEQ",
"members": [
{
"type": "STRING",
"value": "\\"
},
{
"type": "CHOICE",
"members": [
{
"type": "CHOICE",
"members": [
{
"type": "STRING",
"value": "a"
},
{
"type": "STRING",
"value": "b"
},
{
"type": "STRING",
"value": "f"
},
{
"type": "STRING",
"value": "n"
},
{
"type": "STRING",
"value": "r"
},
{
"type": "STRING",
"value": "t"
},
{
"type": "STRING",
"value": "v"
}
]
},
{
"type": "PATTERN",
"value": "[0-3][0-7]{2}"
},
{
"type": "SEQ",
"members": [
{
"type": "STRING",
"value": "x"
},
{
"type": "PATTERN",
"value": "[0-9a-fA-F]{2}"
}
]
},
{
"type": "CHOICE",
"members": [
{
"type": "STRING",
"value": "\\"
},
{
"type": "STRING",
"value": "\""
}
]
}
]
}
]
}
}
},
"extras": [

View file

@ -2,10 +2,51 @@
{
"type": "source_file",
"named": true,
"fields": {}
"fields": {},
"children": {
"multiple": false,
"required": false,
"types": [
{
"type": "integer_literal",
"named": true
},
{
"type": "hello",
"type": "string_literal",
"named": true
}
]
}
},
{
"type": "string_literal",
"named": true,
"fields": {},
"children": {
"multiple": true,
"required": false,
"types": [
{
"type": "escape_sequence",
"named": true
}
]
}
},
{
"type": "\"",
"named": false
},
{
"type": "escape_sequence",
"named": true
},
{
"type": "integer_literal",
"named": true
},
{
"type": "nil",
"named": false
}
]

View file

@ -6,31 +6,52 @@
#endif
#define LANGUAGE_VERSION 13
#define STATE_COUNT 4
#define STATE_COUNT 9
#define LARGE_STATE_COUNT 2
#define SYMBOL_COUNT 3
#define SYMBOL_COUNT 10
#define ALIAS_COUNT 0
#define TOKEN_COUNT 2
#define TOKEN_COUNT 6
#define EXTERNAL_TOKEN_COUNT 0
#define FIELD_COUNT 0
#define MAX_ALIAS_SEQUENCE_LENGTH 1
#define MAX_ALIAS_SEQUENCE_LENGTH 3
#define PRODUCTION_ID_COUNT 1
enum {
anon_sym_hello = 1,
sym_source_file = 2,
anon_sym_nil = 1,
sym_integer_literal = 2,
anon_sym_DQUOTE = 3,
aux_sym_string_literal_token1 = 4,
sym_escape_sequence = 5,
sym_source_file = 6,
sym__expr = 7,
sym_string_literal = 8,
aux_sym_string_literal_repeat1 = 9,
};
static const char * const ts_symbol_names[] = {
[ts_builtin_sym_end] = "end",
[anon_sym_hello] = "hello",
[anon_sym_nil] = "nil",
[sym_integer_literal] = "integer_literal",
[anon_sym_DQUOTE] = "\"",
[aux_sym_string_literal_token1] = "string_literal_token1",
[sym_escape_sequence] = "escape_sequence",
[sym_source_file] = "source_file",
[sym__expr] = "_expr",
[sym_string_literal] = "string_literal",
[aux_sym_string_literal_repeat1] = "string_literal_repeat1",
};
static const TSSymbol ts_symbol_map[] = {
[ts_builtin_sym_end] = ts_builtin_sym_end,
[anon_sym_hello] = anon_sym_hello,
[anon_sym_nil] = anon_sym_nil,
[sym_integer_literal] = sym_integer_literal,
[anon_sym_DQUOTE] = anon_sym_DQUOTE,
[aux_sym_string_literal_token1] = aux_sym_string_literal_token1,
[sym_escape_sequence] = sym_escape_sequence,
[sym_source_file] = sym_source_file,
[sym__expr] = sym__expr,
[sym_string_literal] = sym_string_literal,
[aux_sym_string_literal_repeat1] = aux_sym_string_literal_repeat1,
};
static const TSSymbolMetadata ts_symbol_metadata[] = {
@ -38,14 +59,42 @@ static const TSSymbolMetadata ts_symbol_metadata[] = {
.visible = false,
.named = true,
},
[anon_sym_hello] = {
[anon_sym_nil] = {
.visible = true,
.named = false,
},
[sym_integer_literal] = {
.visible = true,
.named = true,
},
[anon_sym_DQUOTE] = {
.visible = true,
.named = false,
},
[aux_sym_string_literal_token1] = {
.visible = false,
.named = false,
},
[sym_escape_sequence] = {
.visible = true,
.named = true,
},
[sym_source_file] = {
.visible = true,
.named = true,
},
[sym__expr] = {
.visible = false,
.named = true,
},
[sym_string_literal] = {
.visible = true,
.named = true,
},
[aux_sym_string_literal_repeat1] = {
.visible = false,
.named = false,
},
};
static const TSSymbol ts_alias_sequences[PRODUCTION_ID_COUNT][MAX_ALIAS_SEQUENCE_LENGTH] = {
@ -61,30 +110,101 @@ static bool ts_lex(TSLexer *lexer, TSStateId state) {
eof = lexer->eof(lexer);
switch (state) {
case 0:
if (eof) ADVANCE(5);
if (lookahead == 'h') ADVANCE(1);
if (eof) ADVANCE(10);
if (lookahead == '"') ADVANCE(13);
if (lookahead == '\\') ADVANCE(6);
if (lookahead == 'n') ADVANCE(2);
if (lookahead == '\t' ||
lookahead == '\n' ||
lookahead == '\r' ||
lookahead == ' ') SKIP(0)
lookahead == ' ') SKIP(9)
if (('0' <= lookahead && lookahead <= '9')) ADVANCE(12);
END_STATE();
case 1:
if (lookahead == 'e') ADVANCE(3);
if (lookahead == '"') ADVANCE(13);
if (lookahead == '\\') ADVANCE(6);
if (lookahead == '\t' ||
lookahead == '\n' ||
lookahead == '\r' ||
lookahead == ' ') ADVANCE(14);
if (lookahead != 0) ADVANCE(15);
END_STATE();
case 2:
if (lookahead == 'l') ADVANCE(4);
if (lookahead == 'i') ADVANCE(3);
END_STATE();
case 3:
if (lookahead == 'l') ADVANCE(2);
if (lookahead == 'l') ADVANCE(11);
END_STATE();
case 4:
if (lookahead == 'o') ADVANCE(6);
if (('0' <= lookahead && lookahead <= '7')) ADVANCE(16);
END_STATE();
case 5:
ACCEPT_TOKEN(ts_builtin_sym_end);
if (('0' <= lookahead && lookahead <= '7')) ADVANCE(4);
END_STATE();
case 6:
ACCEPT_TOKEN(anon_sym_hello);
if (lookahead == '"' ||
lookahead == '\\' ||
lookahead == 'a' ||
lookahead == 'b' ||
lookahead == 'f' ||
lookahead == 'n' ||
lookahead == 'r' ||
lookahead == 't' ||
lookahead == 'v') ADVANCE(16);
if (lookahead == 'x') ADVANCE(8);
if (('0' <= lookahead && lookahead <= '3')) ADVANCE(5);
END_STATE();
case 7:
if (('0' <= lookahead && lookahead <= '9') ||
('A' <= lookahead && lookahead <= 'F') ||
('a' <= lookahead && lookahead <= 'f')) ADVANCE(16);
END_STATE();
case 8:
if (('0' <= lookahead && lookahead <= '9') ||
('A' <= lookahead && lookahead <= 'F') ||
('a' <= lookahead && lookahead <= 'f')) ADVANCE(7);
END_STATE();
case 9:
if (eof) ADVANCE(10);
if (lookahead == '"') ADVANCE(13);
if (lookahead == 'n') ADVANCE(2);
if (lookahead == '\t' ||
lookahead == '\n' ||
lookahead == '\r' ||
lookahead == ' ') SKIP(9)
if (('0' <= lookahead && lookahead <= '9')) ADVANCE(12);
END_STATE();
case 10:
ACCEPT_TOKEN(ts_builtin_sym_end);
END_STATE();
case 11:
ACCEPT_TOKEN(anon_sym_nil);
END_STATE();
case 12:
ACCEPT_TOKEN(sym_integer_literal);
if (('0' <= lookahead && lookahead <= '9')) ADVANCE(12);
END_STATE();
case 13:
ACCEPT_TOKEN(anon_sym_DQUOTE);
END_STATE();
case 14:
ACCEPT_TOKEN(aux_sym_string_literal_token1);
if (lookahead == '\t' ||
lookahead == '\n' ||
lookahead == '\r' ||
lookahead == ' ') ADVANCE(14);
if (lookahead != 0 &&
lookahead != '"' &&
lookahead != '\\') ADVANCE(15);
END_STATE();
case 15:
ACCEPT_TOKEN(aux_sym_string_literal_token1);
if (lookahead != 0 &&
lookahead != '"' &&
lookahead != '\\') ADVANCE(15);
END_STATE();
case 16:
ACCEPT_TOKEN(sym_escape_sequence);
END_STATE();
default:
return false;
@ -94,41 +214,97 @@ static bool ts_lex(TSLexer *lexer, TSStateId state) {
static const TSLexMode ts_lex_modes[STATE_COUNT] = {
[0] = {.lex_state = 0},
[1] = {.lex_state = 0},
[2] = {.lex_state = 0},
[3] = {.lex_state = 0},
[2] = {.lex_state = 1},
[3] = {.lex_state = 1},
[4] = {.lex_state = 1},
[5] = {.lex_state = 0},
[6] = {.lex_state = 0},
[7] = {.lex_state = 0},
[8] = {.lex_state = 0},
};
static const uint16_t ts_parse_table[LARGE_STATE_COUNT][SYMBOL_COUNT] = {
[0] = {
[ts_builtin_sym_end] = ACTIONS(1),
[anon_sym_hello] = ACTIONS(1),
[anon_sym_nil] = ACTIONS(1),
[sym_integer_literal] = ACTIONS(1),
[anon_sym_DQUOTE] = ACTIONS(1),
[sym_escape_sequence] = ACTIONS(1),
},
[1] = {
[sym_source_file] = STATE(3),
[anon_sym_hello] = ACTIONS(3),
[sym_source_file] = STATE(5),
[sym__expr] = STATE(6),
[sym_string_literal] = STATE(6),
[anon_sym_nil] = ACTIONS(3),
[sym_integer_literal] = ACTIONS(3),
[anon_sym_DQUOTE] = ACTIONS(5),
},
};
static const uint16_t ts_small_parse_table[] = {
[0] = 1,
ACTIONS(5), 1,
ts_builtin_sym_end,
[4] = 1,
[0] = 3,
ACTIONS(7), 1,
anon_sym_DQUOTE,
STATE(3), 1,
aux_sym_string_literal_repeat1,
ACTIONS(9), 2,
aux_sym_string_literal_token1,
sym_escape_sequence,
[11] = 3,
ACTIONS(11), 1,
anon_sym_DQUOTE,
STATE(4), 1,
aux_sym_string_literal_repeat1,
ACTIONS(13), 2,
aux_sym_string_literal_token1,
sym_escape_sequence,
[22] = 3,
ACTIONS(15), 1,
anon_sym_DQUOTE,
STATE(4), 1,
aux_sym_string_literal_repeat1,
ACTIONS(17), 2,
aux_sym_string_literal_token1,
sym_escape_sequence,
[33] = 1,
ACTIONS(20), 1,
ts_builtin_sym_end,
[37] = 1,
ACTIONS(22), 1,
ts_builtin_sym_end,
[41] = 1,
ACTIONS(24), 1,
ts_builtin_sym_end,
[45] = 1,
ACTIONS(26), 1,
ts_builtin_sym_end,
};
static const uint32_t ts_small_parse_table_map[] = {
[SMALL_STATE(2)] = 0,
[SMALL_STATE(3)] = 4,
[SMALL_STATE(3)] = 11,
[SMALL_STATE(4)] = 22,
[SMALL_STATE(5)] = 33,
[SMALL_STATE(6)] = 37,
[SMALL_STATE(7)] = 41,
[SMALL_STATE(8)] = 45,
};
static const TSParseActionEntry ts_parse_actions[] = {
[0] = {.entry = {.count = 0, .reusable = false}},
[1] = {.entry = {.count = 1, .reusable = false}}, RECOVER(),
[3] = {.entry = {.count = 1, .reusable = true}}, SHIFT(2),
[5] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_source_file, 1),
[7] = {.entry = {.count = 1, .reusable = true}}, ACCEPT_INPUT(),
[3] = {.entry = {.count = 1, .reusable = true}}, SHIFT(6),
[5] = {.entry = {.count = 1, .reusable = true}}, SHIFT(2),
[7] = {.entry = {.count = 1, .reusable = false}}, SHIFT(7),
[9] = {.entry = {.count = 1, .reusable = true}}, SHIFT(3),
[11] = {.entry = {.count = 1, .reusable = false}}, SHIFT(8),
[13] = {.entry = {.count = 1, .reusable = true}}, SHIFT(4),
[15] = {.entry = {.count = 1, .reusable = false}}, REDUCE(aux_sym_string_literal_repeat1, 2),
[17] = {.entry = {.count = 2, .reusable = true}}, REDUCE(aux_sym_string_literal_repeat1, 2), SHIFT_REPEAT(4),
[20] = {.entry = {.count = 1, .reusable = true}}, ACCEPT_INPUT(),
[22] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_source_file, 1),
[24] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_string_literal, 2),
[26] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_string_literal, 3),
};
#ifdef __cplusplus

110
test/corpus/literals.txt Normal file
View file

@ -0,0 +1,110 @@
================================================================================
Integer literal
================================================================================
42
--------------------------------------------------------------------------------
(source_file
(integer_literal))
================================================================================
String literal
================================================================================
"Hello World!"
--------------------------------------------------------------------------------
(source_file
(string_literal))
================================================================================
String literal special character escapes
================================================================================
"Hello\nWorld!"
--------------------------------------------------------------------------------
(source_file
(string_literal
(escape_sequence)))
================================================================================
String literal octal
================================================================================
"Hello World\041"
--------------------------------------------------------------------------------
(source_file
(string_literal
(escape_sequence)))
================================================================================
String literal hex
================================================================================
"Hello World\x21"
--------------------------------------------------------------------------------
(source_file
(string_literal
(escape_sequence)))
================================================================================
String literal character escapes
================================================================================
"Hello\\\"World\""
--------------------------------------------------------------------------------
(source_file
(string_literal
(escape_sequence)
(escape_sequence)
(escape_sequence)))
================================================================================
Unterminated string literal
================================================================================
"
--------------------------------------------------------------------------------
(source_file
(string_literal
(MISSING """)))
================================================================================
String literal unterminated escape
================================================================================
"\"
--------------------------------------------------------------------------------
(source_file
(string_literal
(escape_sequence)
(MISSING """)))
================================================================================
String literal invalid octal
================================================================================
"\399"
--------------------------------------------------------------------------------
(source_file
(ERROR
(UNEXPECTED '9'))
(integer_literal)
(ERROR))