Add simple atoms

This commit is contained in:
Bruno BELANYI 2022-06-01 19:33:42 +02:00
parent 30b2e50367
commit e7ba93870e
5 changed files with 530 additions and 37 deletions

View file

@ -2,8 +2,39 @@ module.exports = grammar({
name: "tiger", name: "tiger",
rules: { rules: {
// TODO: add the actual grammar rules source_file: ($) => choice(
source_file: $ => 'hello' $._expr,
),
_expr: ($) => choice(
"nil",
$.integer_literal,
$.string_literal,
),
integer_literal: (_) => /[0-9]+/,
string_literal: ($) => seq(
'"',
repeat(choice($.escape_sequence, /[^"\\]+/)),
'"',
),
escape_sequence: (_) => token.immediate(
seq(
"\\",
choice(
// Special escapes
choice("a", "b", "f", "n", "r", "t", "v"),
// Octal
/[0-3][0-7]{2}/,
// Hexadecimal
seq("x", /[0-9a-fA-F]{2}/),
// Escaped characters
choice("\\", '"'),
)
)
),
} }
}); });

View file

@ -2,8 +2,143 @@
"name": "tiger", "name": "tiger",
"rules": { "rules": {
"source_file": { "source_file": {
"type": "STRING", "type": "CHOICE",
"value": "hello" "members": [
{
"type": "SYMBOL",
"name": "_expr"
}
]
},
"_expr": {
"type": "CHOICE",
"members": [
{
"type": "STRING",
"value": "nil"
},
{
"type": "SYMBOL",
"name": "integer_literal"
},
{
"type": "SYMBOL",
"name": "string_literal"
}
]
},
"integer_literal": {
"type": "PATTERN",
"value": "[0-9]+"
},
"string_literal": {
"type": "SEQ",
"members": [
{
"type": "STRING",
"value": "\""
},
{
"type": "REPEAT",
"content": {
"type": "CHOICE",
"members": [
{
"type": "SYMBOL",
"name": "escape_sequence"
},
{
"type": "PATTERN",
"value": "[^\"\\\\]+"
}
]
}
},
{
"type": "STRING",
"value": "\""
}
]
},
"escape_sequence": {
"type": "IMMEDIATE_TOKEN",
"content": {
"type": "SEQ",
"members": [
{
"type": "STRING",
"value": "\\"
},
{
"type": "CHOICE",
"members": [
{
"type": "CHOICE",
"members": [
{
"type": "STRING",
"value": "a"
},
{
"type": "STRING",
"value": "b"
},
{
"type": "STRING",
"value": "f"
},
{
"type": "STRING",
"value": "n"
},
{
"type": "STRING",
"value": "r"
},
{
"type": "STRING",
"value": "t"
},
{
"type": "STRING",
"value": "v"
}
]
},
{
"type": "PATTERN",
"value": "[0-3][0-7]{2}"
},
{
"type": "SEQ",
"members": [
{
"type": "STRING",
"value": "x"
},
{
"type": "PATTERN",
"value": "[0-9a-fA-F]{2}"
}
]
},
{
"type": "CHOICE",
"members": [
{
"type": "STRING",
"value": "\\"
},
{
"type": "STRING",
"value": "\""
}
]
}
]
}
]
}
} }
}, },
"extras": [ "extras": [

View file

@ -2,10 +2,51 @@
{ {
"type": "source_file", "type": "source_file",
"named": true, "named": true,
"fields": {} "fields": {},
"children": {
"multiple": false,
"required": false,
"types": [
{
"type": "integer_literal",
"named": true
},
{
"type": "string_literal",
"named": true
}
]
}
}, },
{ {
"type": "hello", "type": "string_literal",
"named": true,
"fields": {},
"children": {
"multiple": true,
"required": false,
"types": [
{
"type": "escape_sequence",
"named": true
}
]
}
},
{
"type": "\"",
"named": false
},
{
"type": "escape_sequence",
"named": true
},
{
"type": "integer_literal",
"named": true
},
{
"type": "nil",
"named": false "named": false
} }
] ]

View file

@ -6,31 +6,52 @@
#endif #endif
#define LANGUAGE_VERSION 13 #define LANGUAGE_VERSION 13
#define STATE_COUNT 4 #define STATE_COUNT 9
#define LARGE_STATE_COUNT 2 #define LARGE_STATE_COUNT 2
#define SYMBOL_COUNT 3 #define SYMBOL_COUNT 10
#define ALIAS_COUNT 0 #define ALIAS_COUNT 0
#define TOKEN_COUNT 2 #define TOKEN_COUNT 6
#define EXTERNAL_TOKEN_COUNT 0 #define EXTERNAL_TOKEN_COUNT 0
#define FIELD_COUNT 0 #define FIELD_COUNT 0
#define MAX_ALIAS_SEQUENCE_LENGTH 1 #define MAX_ALIAS_SEQUENCE_LENGTH 3
#define PRODUCTION_ID_COUNT 1 #define PRODUCTION_ID_COUNT 1
enum { enum {
anon_sym_hello = 1, anon_sym_nil = 1,
sym_source_file = 2, sym_integer_literal = 2,
anon_sym_DQUOTE = 3,
aux_sym_string_literal_token1 = 4,
sym_escape_sequence = 5,
sym_source_file = 6,
sym__expr = 7,
sym_string_literal = 8,
aux_sym_string_literal_repeat1 = 9,
}; };
static const char * const ts_symbol_names[] = { static const char * const ts_symbol_names[] = {
[ts_builtin_sym_end] = "end", [ts_builtin_sym_end] = "end",
[anon_sym_hello] = "hello", [anon_sym_nil] = "nil",
[sym_integer_literal] = "integer_literal",
[anon_sym_DQUOTE] = "\"",
[aux_sym_string_literal_token1] = "string_literal_token1",
[sym_escape_sequence] = "escape_sequence",
[sym_source_file] = "source_file", [sym_source_file] = "source_file",
[sym__expr] = "_expr",
[sym_string_literal] = "string_literal",
[aux_sym_string_literal_repeat1] = "string_literal_repeat1",
}; };
static const TSSymbol ts_symbol_map[] = { static const TSSymbol ts_symbol_map[] = {
[ts_builtin_sym_end] = ts_builtin_sym_end, [ts_builtin_sym_end] = ts_builtin_sym_end,
[anon_sym_hello] = anon_sym_hello, [anon_sym_nil] = anon_sym_nil,
[sym_integer_literal] = sym_integer_literal,
[anon_sym_DQUOTE] = anon_sym_DQUOTE,
[aux_sym_string_literal_token1] = aux_sym_string_literal_token1,
[sym_escape_sequence] = sym_escape_sequence,
[sym_source_file] = sym_source_file, [sym_source_file] = sym_source_file,
[sym__expr] = sym__expr,
[sym_string_literal] = sym_string_literal,
[aux_sym_string_literal_repeat1] = aux_sym_string_literal_repeat1,
}; };
static const TSSymbolMetadata ts_symbol_metadata[] = { static const TSSymbolMetadata ts_symbol_metadata[] = {
@ -38,14 +59,42 @@ static const TSSymbolMetadata ts_symbol_metadata[] = {
.visible = false, .visible = false,
.named = true, .named = true,
}, },
[anon_sym_hello] = { [anon_sym_nil] = {
.visible = true, .visible = true,
.named = false, .named = false,
}, },
[sym_integer_literal] = {
.visible = true,
.named = true,
},
[anon_sym_DQUOTE] = {
.visible = true,
.named = false,
},
[aux_sym_string_literal_token1] = {
.visible = false,
.named = false,
},
[sym_escape_sequence] = {
.visible = true,
.named = true,
},
[sym_source_file] = { [sym_source_file] = {
.visible = true, .visible = true,
.named = true, .named = true,
}, },
[sym__expr] = {
.visible = false,
.named = true,
},
[sym_string_literal] = {
.visible = true,
.named = true,
},
[aux_sym_string_literal_repeat1] = {
.visible = false,
.named = false,
},
}; };
static const TSSymbol ts_alias_sequences[PRODUCTION_ID_COUNT][MAX_ALIAS_SEQUENCE_LENGTH] = { static const TSSymbol ts_alias_sequences[PRODUCTION_ID_COUNT][MAX_ALIAS_SEQUENCE_LENGTH] = {
@ -61,30 +110,101 @@ static bool ts_lex(TSLexer *lexer, TSStateId state) {
eof = lexer->eof(lexer); eof = lexer->eof(lexer);
switch (state) { switch (state) {
case 0: case 0:
if (eof) ADVANCE(5); if (eof) ADVANCE(10);
if (lookahead == 'h') ADVANCE(1); if (lookahead == '"') ADVANCE(13);
if (lookahead == '\\') ADVANCE(6);
if (lookahead == 'n') ADVANCE(2);
if (lookahead == '\t' || if (lookahead == '\t' ||
lookahead == '\n' || lookahead == '\n' ||
lookahead == '\r' || lookahead == '\r' ||
lookahead == ' ') SKIP(0) lookahead == ' ') SKIP(9)
if (('0' <= lookahead && lookahead <= '9')) ADVANCE(12);
END_STATE(); END_STATE();
case 1: case 1:
if (lookahead == 'e') ADVANCE(3); if (lookahead == '"') ADVANCE(13);
if (lookahead == '\\') ADVANCE(6);
if (lookahead == '\t' ||
lookahead == '\n' ||
lookahead == '\r' ||
lookahead == ' ') ADVANCE(14);
if (lookahead != 0) ADVANCE(15);
END_STATE(); END_STATE();
case 2: case 2:
if (lookahead == 'l') ADVANCE(4); if (lookahead == 'i') ADVANCE(3);
END_STATE(); END_STATE();
case 3: case 3:
if (lookahead == 'l') ADVANCE(2); if (lookahead == 'l') ADVANCE(11);
END_STATE(); END_STATE();
case 4: case 4:
if (lookahead == 'o') ADVANCE(6); if (('0' <= lookahead && lookahead <= '7')) ADVANCE(16);
END_STATE(); END_STATE();
case 5: case 5:
ACCEPT_TOKEN(ts_builtin_sym_end); if (('0' <= lookahead && lookahead <= '7')) ADVANCE(4);
END_STATE(); END_STATE();
case 6: case 6:
ACCEPT_TOKEN(anon_sym_hello); if (lookahead == '"' ||
lookahead == '\\' ||
lookahead == 'a' ||
lookahead == 'b' ||
lookahead == 'f' ||
lookahead == 'n' ||
lookahead == 'r' ||
lookahead == 't' ||
lookahead == 'v') ADVANCE(16);
if (lookahead == 'x') ADVANCE(8);
if (('0' <= lookahead && lookahead <= '3')) ADVANCE(5);
END_STATE();
case 7:
if (('0' <= lookahead && lookahead <= '9') ||
('A' <= lookahead && lookahead <= 'F') ||
('a' <= lookahead && lookahead <= 'f')) ADVANCE(16);
END_STATE();
case 8:
if (('0' <= lookahead && lookahead <= '9') ||
('A' <= lookahead && lookahead <= 'F') ||
('a' <= lookahead && lookahead <= 'f')) ADVANCE(7);
END_STATE();
case 9:
if (eof) ADVANCE(10);
if (lookahead == '"') ADVANCE(13);
if (lookahead == 'n') ADVANCE(2);
if (lookahead == '\t' ||
lookahead == '\n' ||
lookahead == '\r' ||
lookahead == ' ') SKIP(9)
if (('0' <= lookahead && lookahead <= '9')) ADVANCE(12);
END_STATE();
case 10:
ACCEPT_TOKEN(ts_builtin_sym_end);
END_STATE();
case 11:
ACCEPT_TOKEN(anon_sym_nil);
END_STATE();
case 12:
ACCEPT_TOKEN(sym_integer_literal);
if (('0' <= lookahead && lookahead <= '9')) ADVANCE(12);
END_STATE();
case 13:
ACCEPT_TOKEN(anon_sym_DQUOTE);
END_STATE();
case 14:
ACCEPT_TOKEN(aux_sym_string_literal_token1);
if (lookahead == '\t' ||
lookahead == '\n' ||
lookahead == '\r' ||
lookahead == ' ') ADVANCE(14);
if (lookahead != 0 &&
lookahead != '"' &&
lookahead != '\\') ADVANCE(15);
END_STATE();
case 15:
ACCEPT_TOKEN(aux_sym_string_literal_token1);
if (lookahead != 0 &&
lookahead != '"' &&
lookahead != '\\') ADVANCE(15);
END_STATE();
case 16:
ACCEPT_TOKEN(sym_escape_sequence);
END_STATE(); END_STATE();
default: default:
return false; return false;
@ -94,41 +214,97 @@ static bool ts_lex(TSLexer *lexer, TSStateId state) {
static const TSLexMode ts_lex_modes[STATE_COUNT] = { static const TSLexMode ts_lex_modes[STATE_COUNT] = {
[0] = {.lex_state = 0}, [0] = {.lex_state = 0},
[1] = {.lex_state = 0}, [1] = {.lex_state = 0},
[2] = {.lex_state = 0}, [2] = {.lex_state = 1},
[3] = {.lex_state = 0}, [3] = {.lex_state = 1},
[4] = {.lex_state = 1},
[5] = {.lex_state = 0},
[6] = {.lex_state = 0},
[7] = {.lex_state = 0},
[8] = {.lex_state = 0},
}; };
static const uint16_t ts_parse_table[LARGE_STATE_COUNT][SYMBOL_COUNT] = { static const uint16_t ts_parse_table[LARGE_STATE_COUNT][SYMBOL_COUNT] = {
[0] = { [0] = {
[ts_builtin_sym_end] = ACTIONS(1), [ts_builtin_sym_end] = ACTIONS(1),
[anon_sym_hello] = ACTIONS(1), [anon_sym_nil] = ACTIONS(1),
[sym_integer_literal] = ACTIONS(1),
[anon_sym_DQUOTE] = ACTIONS(1),
[sym_escape_sequence] = ACTIONS(1),
}, },
[1] = { [1] = {
[sym_source_file] = STATE(3), [sym_source_file] = STATE(5),
[anon_sym_hello] = ACTIONS(3), [sym__expr] = STATE(6),
[sym_string_literal] = STATE(6),
[anon_sym_nil] = ACTIONS(3),
[sym_integer_literal] = ACTIONS(3),
[anon_sym_DQUOTE] = ACTIONS(5),
}, },
}; };
static const uint16_t ts_small_parse_table[] = { static const uint16_t ts_small_parse_table[] = {
[0] = 1, [0] = 3,
ACTIONS(5), 1,
ts_builtin_sym_end,
[4] = 1,
ACTIONS(7), 1, ACTIONS(7), 1,
anon_sym_DQUOTE,
STATE(3), 1,
aux_sym_string_literal_repeat1,
ACTIONS(9), 2,
aux_sym_string_literal_token1,
sym_escape_sequence,
[11] = 3,
ACTIONS(11), 1,
anon_sym_DQUOTE,
STATE(4), 1,
aux_sym_string_literal_repeat1,
ACTIONS(13), 2,
aux_sym_string_literal_token1,
sym_escape_sequence,
[22] = 3,
ACTIONS(15), 1,
anon_sym_DQUOTE,
STATE(4), 1,
aux_sym_string_literal_repeat1,
ACTIONS(17), 2,
aux_sym_string_literal_token1,
sym_escape_sequence,
[33] = 1,
ACTIONS(20), 1,
ts_builtin_sym_end,
[37] = 1,
ACTIONS(22), 1,
ts_builtin_sym_end,
[41] = 1,
ACTIONS(24), 1,
ts_builtin_sym_end,
[45] = 1,
ACTIONS(26), 1,
ts_builtin_sym_end, ts_builtin_sym_end,
}; };
static const uint32_t ts_small_parse_table_map[] = { static const uint32_t ts_small_parse_table_map[] = {
[SMALL_STATE(2)] = 0, [SMALL_STATE(2)] = 0,
[SMALL_STATE(3)] = 4, [SMALL_STATE(3)] = 11,
[SMALL_STATE(4)] = 22,
[SMALL_STATE(5)] = 33,
[SMALL_STATE(6)] = 37,
[SMALL_STATE(7)] = 41,
[SMALL_STATE(8)] = 45,
}; };
static const TSParseActionEntry ts_parse_actions[] = { static const TSParseActionEntry ts_parse_actions[] = {
[0] = {.entry = {.count = 0, .reusable = false}}, [0] = {.entry = {.count = 0, .reusable = false}},
[1] = {.entry = {.count = 1, .reusable = false}}, RECOVER(), [1] = {.entry = {.count = 1, .reusable = false}}, RECOVER(),
[3] = {.entry = {.count = 1, .reusable = true}}, SHIFT(2), [3] = {.entry = {.count = 1, .reusable = true}}, SHIFT(6),
[5] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_source_file, 1), [5] = {.entry = {.count = 1, .reusable = true}}, SHIFT(2),
[7] = {.entry = {.count = 1, .reusable = true}}, ACCEPT_INPUT(), [7] = {.entry = {.count = 1, .reusable = false}}, SHIFT(7),
[9] = {.entry = {.count = 1, .reusable = true}}, SHIFT(3),
[11] = {.entry = {.count = 1, .reusable = false}}, SHIFT(8),
[13] = {.entry = {.count = 1, .reusable = true}}, SHIFT(4),
[15] = {.entry = {.count = 1, .reusable = false}}, REDUCE(aux_sym_string_literal_repeat1, 2),
[17] = {.entry = {.count = 2, .reusable = true}}, REDUCE(aux_sym_string_literal_repeat1, 2), SHIFT_REPEAT(4),
[20] = {.entry = {.count = 1, .reusable = true}}, ACCEPT_INPUT(),
[22] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_source_file, 1),
[24] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_string_literal, 2),
[26] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_string_literal, 3),
}; };
#ifdef __cplusplus #ifdef __cplusplus

110
test/corpus/literals.txt Normal file
View file

@ -0,0 +1,110 @@
================================================================================
Integer literal
================================================================================
42
--------------------------------------------------------------------------------
(source_file
(integer_literal))
================================================================================
String literal
================================================================================
"Hello World!"
--------------------------------------------------------------------------------
(source_file
(string_literal))
================================================================================
String literal special character escapes
================================================================================
"Hello\nWorld!"
--------------------------------------------------------------------------------
(source_file
(string_literal
(escape_sequence)))
================================================================================
String literal octal
================================================================================
"Hello World\041"
--------------------------------------------------------------------------------
(source_file
(string_literal
(escape_sequence)))
================================================================================
String literal hex
================================================================================
"Hello World\x21"
--------------------------------------------------------------------------------
(source_file
(string_literal
(escape_sequence)))
================================================================================
String literal character escapes
================================================================================
"Hello\\\"World\""
--------------------------------------------------------------------------------
(source_file
(string_literal
(escape_sequence)
(escape_sequence)
(escape_sequence)))
================================================================================
Unterminated string literal
================================================================================
"
--------------------------------------------------------------------------------
(source_file
(string_literal
(MISSING """)))
================================================================================
String literal unterminated escape
================================================================================
"\"
--------------------------------------------------------------------------------
(source_file
(string_literal
(escape_sequence)
(MISSING """)))
================================================================================
String literal invalid octal
================================================================================
"\399"
--------------------------------------------------------------------------------
(source_file
(ERROR
(UNEXPECTED '9'))
(integer_literal)
(ERROR))