commit 83c75de773b94f9ac0ddfc90295432e36e08a401 Author: Bruno BELANYI Date: Sun Nov 8 16:55:12 2020 +0100 pratt: add parser This is based on C's operator precedence rules diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..2c4c7ad --- /dev/null +++ b/Makefile @@ -0,0 +1,22 @@ +CC = gcc +CPPFLAGS = -Isrc/ -D_POSIX_C_SOURCE=200809L +VPATH = src/ +CFLAGS = -Wall -Wextra -pedantic -Werror -std=c99 +USE_CLIMBING = 1 + +SRC = \ + src/eval.c \ + +BIN = pratt +OBJ = $(SRC:.c=.o) + +.PHONY: all +all: $(BIN) + +$(BIN): +$(BIN): $(OBJ) src/pratt.o + +.PHONY: clean +clean: + $(RM) $(OBJ) # remove object files + $(RM) $(BIN) # remove main program diff --git a/src/eval.c b/src/eval.c new file mode 100644 index 0000000..e3439f7 --- /dev/null +++ b/src/eval.c @@ -0,0 +1,269 @@ +#include +#include +#include +#include + +#define UNREACHABLE() __builtin_unreachable() +#define ARR_SIZE(Arr) (sizeof(Arr) / sizeof(*Arr)) +#define OP_STRING(...) (const char[]){__VA_ARGS__} +#define OP_SIZE(...) (sizeof(OP_STRING(__VA_ARGS__)) - 1) + +typedef bool (nul_f)(const char **input, int *res, int until); +typedef bool (left_f)(int lhs, const char **input, int *res, int until); + +// Define the different tokens +enum token_kind { +#define OP(Name, ...) Name, +#include "operators.inc" +}; + +struct token { + enum token_kind kind; + int val; // Used for NUMBER +}; + +// Forward declare functions +#define NulFunc(Name) \ + static nul_f eval_ ## Name ## _nul; +#define LeftFunc(Name) \ + static left_f eval_ ## Name ## _left; + +#define PREFIX_POSTFIX(Name) \ + NulFunc(Name) \ + LeftFunc(Name) +#define PREFIX(Name) \ + NulFunc(Name) +#define PREFIX_INFIX(Name) \ + NulFunc(Name) \ + LeftFunc(Name) +#define INFIX(Name) \ + LeftFunc(Name) +#define TERN(Name) \ + LeftFunc(Name) +#define PAREN(Name) \ + NulFunc(Name) +#define NOT_OP(Name) +# define OP(Name, NulPrio, LeftPrio, Type, ...) \ + Type(Name) +#include "operators.inc" + +#undef NulFunc +#undef LeftFunc + +// Symbol table +static const struct { + const char *op; + const size_t op_len; + const int nul_prio; + const int left_prio; + nul_f *const nul_func; + left_f *const left_func; +} ops[] = { +#define NulFunc(Name) \ + eval_ ## Name ## _nul +#define LeftFunc(Name) \ + eval_ ## Name ## _left + +#define PREFIX_POSTFIX(Name, NulPrio, LeftPrio, ...) \ + [Name] = {OP_STRING(__VA_ARGS__), OP_SIZE(__VA_ARGS__), NulPrio, LeftPrio, NulFunc(Name), LeftFunc(Name), }, +#define PREFIX(Name, NulPrio, LeftPrio, ...) \ + [Name] = {OP_STRING(__VA_ARGS__), OP_SIZE(__VA_ARGS__), NulPrio, LeftPrio, NulFunc(Name), NULL, }, +#define PREFIX_INFIX(Name, NulPrio, LeftPrio, ...) \ + [Name] = {OP_STRING(__VA_ARGS__), OP_SIZE(__VA_ARGS__), NulPrio, LeftPrio, NulFunc(Name), LeftFunc(Name), }, +#define INFIX(Name, NulPrio, LeftPrio, ...) \ + [Name] = {OP_STRING(__VA_ARGS__), OP_SIZE(__VA_ARGS__), NulPrio, LeftPrio, NULL, LeftFunc(Name), }, +#define TERN(Name, NulPrio, LeftPrio, ...) \ + [Name] = {OP_STRING(__VA_ARGS__), OP_SIZE(__VA_ARGS__), NulPrio, LeftPrio, NULL, LeftFunc(Name), }, +#define PAREN(Name, NulPrio, LeftPrio, ...) \ + [Name] = {OP_STRING(__VA_ARGS__), OP_SIZE(__VA_ARGS__), NulPrio, LeftPrio, NulFunc(Name), NULL, }, +#define NOT_OP(Name, NulPrio, LeftPrio, ...) \ + [Name] = {OP_STRING(__VA_ARGS__), OP_SIZE(__VA_ARGS__), NulPrio, LeftPrio, NULL, NULL, }, +# define OP(Name, NulPrio, LeftPrio, Type, Operator, /* Operator String */ ...) \ + Type(Name, NulPrio, LeftPrio, __VA_ARGS__) +#include "operators.inc" + +#undef LeftFunc +#undef NulFunc +}; + +// Lexing functions +static void skip_whitespace(const char **input) { + while (*input[0] && isspace(*input[0])) + *input += 1; // Skip this character +} + +static bool expect(enum token_kind expected, const char **input) { + skip_whitespace(input); + + if (strncmp(*input, ops[expected].op, ops[expected].op_len) != 0) + return false; + + *input += ops[expected].op_len; + return true; +} + +static size_t my_atoi(const char **input, int *val) { + size_t len = 0; + + *val = 0; // Initialize its value + while (isdigit((*input)[len])) + { + *val *= 10; + *val += (*input)[len] - '0'; + len += 1; + }; + + return len; +} + +static size_t lex_operator(const char **input, enum token_kind *op) { + size_t best_len = 0; + + for (size_t i = 0; i < ARR_SIZE(ops); ++i) + { + if (ops[i].op_len <= best_len) // Only look at longer operators + continue; + if (strncmp(*input, ops[i].op, ops[i].op_len) == 0) + { + best_len = ops[i].op_len; + *op = i; + } + } + + return best_len; +} + +static size_t lex_token(const char **input, struct token *tok) { + skip_whitespace(input); + + size_t len; + + if ((len = lex_operator(input, &tok->kind))) + return len; + + // Assume that it is a number + tok->kind = NUMBER; + return my_atoi(input, &tok->val); +} + +static bool parse_until_left(int lhs, const char **input, int *res, int prio) { + struct token tok; + size_t len = 0; + + while ((len = lex_token(input, &tok)) && + prio < ops[tok.kind].left_prio) { + *input += len; + left_f *left_func = ops[tok.kind].left_func; + if (!left_func) + return false; // Error: not a prefix operator + if (!left_func(lhs, input, res, prio)) + return false; // Error: could not parse right-hand-side + lhs = *res; + } + + return true; +} +static bool parse_until(const char **input, int *res, int prio) { + struct token tok; + size_t len = 0; + + // Parse left token + if (!(len = lex_token(input, &tok))) + return false; + + // Assume prefix + *input += len; + if (tok.kind == NUMBER) { + *res = tok.val; + } else { + nul_f *nul_func = ops[tok.kind].nul_func; + if (!nul_func) + return false; // Error: not a prefix operator + nul_func(input, res, prio); + } + + // Do left-loop + return parse_until_left(*res, input, res, prio); +} + +bool eval_string(const char *input, int *res) { + if (!parse_until(&input, res, 0)) + return false; + // Verify that only the expression exists + skip_whitespace(&input); + return *input == '\0'; +} + +#define NulFunc(Name) \ + static bool eval_ ## Name ## _nul( \ + const char **input, \ + int *res, \ + __attribute__((unused)) int until) +#define LeftFunc(Name) \ + static bool eval_ ## Name ## _left( \ + int lhs, \ + const char **input, \ + int *res, \ + __attribute__((unused)) int until) + +#define PREFIX_POSTFIX(Name, NulPrio, LeftPrio, Operator) \ + LeftFunc(Name) { \ + return parse_until_left(lhs Operator, input, res, LeftPrio); \ + } \ + NulFunc(Name) { \ + if (!parse_until(input, res, NulPrio)) \ + return false; \ + Operator *res; \ + return true; \ + } +#define PREFIX(Name, NulPrio, LeftPrio, Operator) \ + NulFunc(Name) { \ + if (!parse_until(input, res, NulPrio)) \ + return false; \ + *res = Operator *res; \ + return true; \ + } +#define PREFIX_INFIX(Name, NulPrio, LeftPrio, Operator) \ + LeftFunc(Name) { \ + if (!parse_until(input, res, LeftPrio)) \ + return false; \ + *res = lhs Operator *res; \ + return true; \ + } \ + NulFunc(Name) { \ + if (!parse_until(input, res, NulPrio)) \ + return false; \ + *res = Operator *res; \ + return true; \ + } +#define INFIX(Name, NulPrio, LeftPrio, Operator) \ + LeftFunc(Name) { \ + if (!parse_until(input, res, LeftPrio)) \ + return false; \ + *res = lhs Operator *res; \ + return true; \ + } +#define TERN(Name, NulPrio, LeftPrio, Operator) \ + LeftFunc(Name) { \ + int true_val; \ + if (!parse_until(input, &true_val, 0) || !expect(COLON, input)) \ + return false; \ + int false_val; \ + if (!parse_until(input, &false_val, until)) \ + return false; \ + *res = (lhs ? true_val : false_val); \ + return true; \ + } +#define PAREN(Name, NulPrio, LeftPrio, Operator) \ + NulFunc(Name) { \ + if (!parse_until(input, res, 0)) \ + return false; \ + return expect(R_PAREN, input); \ + } +#define NOT_OP(Name, NulPrio, LeftPrio, Operator) /* Nothing */ +# define OP(Name, NulPrio, LeftPrio, Type, Operator, ...) \ + Type(Name, NulPrio, LeftPrio, Operator) +#include "operators.inc" + +#undef LeftFunc +#undef NulFunc diff --git a/src/eval.h b/src/eval.h new file mode 100644 index 0000000..cd0d57d --- /dev/null +++ b/src/eval.h @@ -0,0 +1,8 @@ +#ifndef ONCE_H +#define ONCE_H + +#include + +bool eval_string(const char *input, int *res); + +#endif /* !ONCE_H */ diff --git a/src/operators.inc b/src/operators.inc new file mode 100644 index 0000000..08b217e --- /dev/null +++ b/src/operators.inc @@ -0,0 +1,45 @@ +/* + * 10 '?:' ternary (set to 0 in middle) + * + * Do not use any assignation yet + */ +#ifndef OP +# define OP(Name, NulPrio, LeftPrio, Type, Operator, /* Operator String */ ...) +#endif + +// Simple operators +OP( PLUS_PLUS, 120, 110, PREFIX_POSTFIX, ++, '+', '+', 0) +OP(MINUS_MINUS, 120, 110, PREFIX_POSTFIX, --, '-', '-', 0) +OP( NOT, 120, -1, PREFIX, !, '!', 0) +OP( BIT_NOT, 120, -1, PREFIX, ~, '~', 0) +OP( PLUS, 110, 90, PREFIX_INFIX, +, '+', 0) +OP( MINUS, 110, 90, PREFIX_INFIX, -, '-', 0) +OP( TIMES, -1, 100, INFIX, *, '*', 0) +OP( DIVIDES, -1, 100, INFIX, /, '/', 0) +OP( MODULO, -1, 100, INFIX, %, '%', 0) +OP( LEFT_SHIFT, -1, 80, INFIX, <<, '<', '<', 0) +OP( RIGHT_SHIFT, -1, 80, INFIX, >>, '>', '>', 0) +OP( EQUAL, -1, 70, INFIX, ==, '=', '=', 0) +OP( NOT_EQUAL, -1, 70, INFIX, !=, '!', '=', 0) +OP( BIT_AND, -1, 60, INFIX, &, '&', 0) +OP( BIT_XOR, -1, 50, INFIX, ^, '^', 0) +OP( BIT_OR, -1, 40, INFIX, |, '|', 0) +OP( AND, -1, 30, INFIX, &&, '&', '&', 0) +OP( OR, -1, 20, INFIX, ||, '|', '|', 0) +// Special operators +OP( TERNARY, -1, 10, TERN, PLACEHOLDER, '?', 0) +OP( COLON, -1, -1, NOT_OP, PLACEHOLDER, ':', 0) +OP( L_PAREN, 0, 0, PAREN, PLACEHOLDER, '(', 0) +OP( R_PAREN, -1, -1, NOT_OP, PLACEHOLDER, ')', 0) +// Special tokens +OP( NUMBER, 0, -1, NOT_OP, PLACEHOLDER, 0) + +#undef OP + +#undef PREFIX_POSTFIX +#undef PREFIX +#undef PREFIX_INFIX +#undef INFIX +#undef TERN +#undef PAREN +#undef NOT_OP diff --git a/src/pratt.c b/src/pratt.c new file mode 100644 index 0000000..837592a --- /dev/null +++ b/src/pratt.c @@ -0,0 +1,28 @@ +#include +#include + +#include "eval.h" + +int main(void) +{ + char *line = NULL; + size_t size = 0; + ssize_t ret = 0; + + while ((getline(&line, &size, stdin)) > 0) + { + int res; + if (!eval_string(line, &res)) + { + fputs("Could not parse input\n", stderr); + ret = 1; + continue; + } + + printf("%d\n", res); + } + + free(line); + + return ret; +}