diff options
author | Ting-Wei Lan <lantw44@gmail.com> | 2015-12-06 22:46:08 +0800 |
---|---|---|
committer | Ting-Wei Lan <lantw44@gmail.com> | 2015-12-06 22:46:08 +0800 |
commit | 39b409c9625f427966ae577aa3caa4d8caaa56a0 (patch) | |
tree | 73c0602e13440813452fcdcdcbecfcc533bca0df | |
parent | 21e57e72f5d7417bc2704d0356d66c21c269114a (diff) | |
download | compiler2015-39b409c9625f427966ae577aa3caa4d8caaa56a0.tar compiler2015-39b409c9625f427966ae577aa3caa4d8caaa56a0.tar.gz compiler2015-39b409c9625f427966ae577aa3caa4d8caaa56a0.tar.bz2 compiler2015-39b409c9625f427966ae577aa3caa4d8caaa56a0.tar.lz compiler2015-39b409c9625f427966ae577aa3caa4d8caaa56a0.tar.xz compiler2015-39b409c9625f427966ae577aa3caa4d8caaa56a0.tar.zst compiler2015-39b409c9625f427966ae577aa3caa4d8caaa56a0.zip |
Implement semantic checks for type and variable declarations
-rw-r--r-- | src/draw.c | 64 | ||||
-rw-r--r-- | src/draw.h | 3 | ||||
-rw-r--r-- | src/semantic-analysis.c | 355 | ||||
-rw-r--r-- | src/symbol-table.c | 11 | ||||
-rw-r--r-- | src/symbol-table.h | 3 |
5 files changed, 434 insertions, 2 deletions
@@ -189,4 +189,68 @@ void ccmmc_draw_ast(FILE *fp, const char *name, CcmmcAst *root) fprintf(fp , "}\n"); } +static void print_symbol_type(FILE *fp, CcmmcSymbolType type) +{ + switch (type.type_base) { + case CCMMC_AST_VALUE_INT: + fputs("int", fp); + break; + case CCMMC_AST_VALUE_FLOAT: + fputs("float", fp); + break; + case CCMMC_AST_VALUE_VOID: + fputs("void", fp); + break; + case CCMMC_AST_VALUE_INT_PTR: + case CCMMC_AST_VALUE_FLOAT_PTR: + case CCMMC_AST_VALUE_CONST_STRING: + case CCMMC_AST_VALUE_NONE: + case CCMMC_AST_VALUE_ERROR: + default: + assert(false); + } + + if (ccmmc_symbol_type_is_array(type)) + for (size_t i = 0; i < type.array_dimension; i++) + fprintf(fp, "[%zu]", type.array_size[i]); + + if (ccmmc_symbol_type_is_function(type)) { + fputs(" (*)(", fp); + for (size_t i = 0; i < type.param_count; i++) { + print_symbol_type(fp, type.param_list[i]); + if (i == type.param_count - 1) + fputs(")", fp); + else + fputs(", ", fp); + } + } +} + +void ccmmc_draw_symbol_scope(FILE *fp, CcmmcSymbolScope *scope) +{ + for (int i = 0; i < CCMMC_SYMBOL_SCOPE_HASH_TABLE_SIZE; i++) { + for (CcmmcSymbol *symbol = scope->hash_table[i]; symbol != NULL; + symbol = symbol->next) { + fprintf(fp, "Bucket %d: %s, ", i, symbol->name); + switch (symbol->kind) { + case CCMMC_SYMBOL_KIND_TYPE: + fputs("TYPE, ", fp); + print_symbol_type(fp, symbol->type); + break; + case CCMMC_SYMBOL_KIND_VARIABLE: + fputs("VARIABLE, ", fp); + print_symbol_type(fp, symbol->type); + break; + case CCMMC_SYMBOL_KIND_FUNCTION: + fputs("FUNCTION, ", fp); + print_symbol_type(fp, symbol->type); + break; + default: + assert(false); + } + putc('\n', fp); + } + } +} + // vim: set sw=4 ts=4 sts=4 et: @@ -2,12 +2,15 @@ #define CCMMC_HEADER_DRAW_H #include "ast.h" +#include "symbol-table.h" #include <stdio.h> void ccmmc_draw_ast (FILE *fp, const char *name, CcmmcAst *root); +void ccmmc_draw_symbol_scope (FILE *fp, + CcmmcSymbolScope *scope); #endif // vim: set sw=4 ts=4 sts=4 et: diff --git a/src/semantic-analysis.c b/src/semantic-analysis.c index f7c1f9c..9bed272 100644 --- a/src/semantic-analysis.c +++ b/src/semantic-analysis.c @@ -3,13 +3,364 @@ #endif #include "semantic-analysis.h" +#include "common.h" +#include <assert.h> #include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#define ERROR(msg) ("Error found in line %zu\n" msg "\n") -bool ccmmc_semantic_check (CcmmcAst *root, CcmmcSymbolTable *table) + +static CcmmcValueConst eval_const_expr(CcmmcAst *expr) { + if (expr->type_node == CCMMC_AST_NODE_CONST_VALUE) { + return expr->value_const; + } + assert(expr->type_node == CCMMC_AST_NODE_EXPR); + assert(expr->value_expr.kind == CCMMC_KIND_EXPR_BINARY_OP); + CcmmcValueConst left = eval_const_expr(expr->child); + CcmmcValueConst right = eval_const_expr(expr->child->right_sibling); + switch (expr->value_expr.op_binary) { + case CCMMC_KIND_OP_BINARY_ADD: + if (left.kind == CCMMC_KIND_CONST_INT) { + if (right.kind == CCMMC_KIND_CONST_INT) { + return (CcmmcValueConst){ .kind = CCMMC_KIND_CONST_INT, + .const_int = left.const_int + right.const_int }; + } else { + float left_value = left.const_int; + float right_value = right.const_float; + return (CcmmcValueConst){ .kind = CCMMC_KIND_CONST_FLOAT, + .const_float = left_value + right_value }; + } + } else { + if (right.kind == CCMMC_KIND_CONST_INT) { + float left_value = left.const_float; + float right_value = right.const_int; + return (CcmmcValueConst){ .kind = CCMMC_KIND_CONST_FLOAT, + .const_float = left_value + right_value }; + } else { + return (CcmmcValueConst){ .kind = CCMMC_KIND_CONST_FLOAT, + .const_float = left.const_float + right.const_float }; + } + } + case CCMMC_KIND_OP_BINARY_SUB: + if (left.kind == CCMMC_KIND_CONST_INT) { + if (right.kind == CCMMC_KIND_CONST_INT) { + return (CcmmcValueConst){ .kind = CCMMC_KIND_CONST_INT, + .const_int = left.const_int - right.const_int }; + } else { + float left_value = left.const_int; + float right_value = right.const_float; + return (CcmmcValueConst){ .kind = CCMMC_KIND_CONST_FLOAT, + .const_float = left_value - right_value }; + } + } else { + if (right.kind == CCMMC_KIND_CONST_INT) { + float left_value = left.const_float; + float right_value = right.const_int; + return (CcmmcValueConst){ .kind = CCMMC_KIND_CONST_FLOAT, + .const_float = left_value - right_value }; + } else { + return (CcmmcValueConst){ .kind = CCMMC_KIND_CONST_FLOAT, + .const_float = left.const_float - right.const_float }; + } + } + case CCMMC_KIND_OP_BINARY_MUL: + if (left.kind == CCMMC_KIND_CONST_INT) { + if (right.kind == CCMMC_KIND_CONST_INT) { + return (CcmmcValueConst){ .kind = CCMMC_KIND_CONST_INT, + .const_int = left.const_int * right.const_int }; + } else { + float left_value = left.const_int; + float right_value = right.const_float; + return (CcmmcValueConst){ .kind = CCMMC_KIND_CONST_FLOAT, + .const_float = left_value * right_value }; + } + } else { + if (right.kind == CCMMC_KIND_CONST_INT) { + float left_value = left.const_float; + float right_value = right.const_int; + return (CcmmcValueConst){ .kind = CCMMC_KIND_CONST_FLOAT, + .const_float = left_value * right_value }; + } else { + return (CcmmcValueConst){ .kind = CCMMC_KIND_CONST_FLOAT, + .const_float = left.const_float * right.const_float }; + } + } + case CCMMC_KIND_OP_BINARY_DIV: + if (left.kind == CCMMC_KIND_CONST_INT) { + if (right.kind == CCMMC_KIND_CONST_INT) { + if (right.const_int == 0) { + fprintf(stderr, ERROR("Integer division by zero."), (size_t)0); + // XXX: We should use an invalid type + return (CcmmcValueConst){ .kind = CCMMC_KIND_CONST_STRING }; + } + return (CcmmcValueConst){ .kind = CCMMC_KIND_CONST_INT, + .const_int = left.const_int / right.const_int }; + } else { + float left_value = left.const_int; + float right_value = right.const_float; + return (CcmmcValueConst){ .kind = CCMMC_KIND_CONST_FLOAT, + .const_float = left_value / right_value }; + } + } else { + if (right.kind == CCMMC_KIND_CONST_INT) { + float left_value = left.const_float; + float right_value = right.const_int; + return (CcmmcValueConst){ .kind = CCMMC_KIND_CONST_FLOAT, + .const_float = left_value / right_value }; + } else { + return (CcmmcValueConst){ .kind = CCMMC_KIND_CONST_FLOAT, + .const_float = left.const_float / right.const_float }; + } + } + case CCMMC_KIND_OP_BINARY_EQ: + case CCMMC_KIND_OP_BINARY_GE: + case CCMMC_KIND_OP_BINARY_LE: + case CCMMC_KIND_OP_BINARY_NE: + case CCMMC_KIND_OP_BINARY_GT: + case CCMMC_KIND_OP_BINARY_LT: + case CCMMC_KIND_OP_BINARY_AND: + case CCMMC_KIND_OP_BINARY_OR: + default: + assert(false); + } +} + +static size_t *get_array_size(CcmmcAst *id_array, size_t *array_dimension) +{ + CcmmcAst *dim; + size_t dim_count = 0; + for (dim = id_array->child; dim != NULL; dim = dim->right_sibling, dim_count++); + assert(dim_count != 0); + + size_t dim_index = 0; + size_t *array_size = malloc(sizeof(size_t) * dim_count); + ERR_FATAL_CHECK(array_size, malloc); + for (dim = id_array->child; dim != NULL; dim = dim->right_sibling, dim_index++) { + CcmmcValueConst value = eval_const_expr(dim); + if (value.kind != CCMMC_KIND_CONST_INT) { + fprintf(stderr, ERROR("Array subscript is not an integer."), (size_t)0); + free(array_size); + return NULL; + } + if (value.const_int <= 0) { + fprintf(stderr, ERROR("Array size must be positive."), (size_t)0); + free(array_size); + return NULL; + } + array_size[dim_index] = value.const_int; + } + + *array_dimension = dim_count; + return array_size; +} + +static size_t *get_array_of_array_size(CcmmcAst *id_array, size_t *array_dimension, + size_t base_array_dimension, size_t *base_array_size) +{ + size_t this_array_dimension; + size_t *this_array_size = get_array_size(id_array, &this_array_dimension); + if (this_array_size == NULL) { + free (this_array_size); + return NULL; + } + + size_t dim_count = this_array_dimension + base_array_dimension; + size_t *array_size = realloc(this_array_size, sizeof(size_t) * dim_count); + ERR_FATAL_CHECK(array_size, realloc); + memcpy(array_size + this_array_dimension, + base_array_size, sizeof(size_t) * base_array_dimension); + + *array_dimension = dim_count; + return array_size; +} + +static bool process_typedef(CcmmcAst *type_decl, CcmmcSymbolTable *table) +{ + bool any_error = false; + + // The leftmost child is an existing type + assert(type_decl->child != NULL); + assert(type_decl->child->type_node == CCMMC_AST_NODE_ID); + assert(type_decl->child->value_id.kind == CCMMC_KIND_ID_NORMAL); + const char *source_str = type_decl->child->value_id.name; + CcmmcSymbol *source_sym = ccmmc_symbol_table_retrive(table, source_str); + // We don't support an existing array typedef + assert(ccmmc_symbol_is_scalar(source_sym)); + if (source_sym == NULL) { + fprintf(stderr, ERROR("ID `%s' undeclared."), (size_t)0, source_str); + return true; + } + if (source_sym->kind != CCMMC_SYMBOL_KIND_TYPE) { + fprintf(stderr, ERROR("ID `%s' is not a type."), (size_t)0, source_str); + return true; + } + + // Other children are newly declared types + for (CcmmcAst *id = type_decl->child->right_sibling; id != NULL; + id = id->right_sibling) { + assert(id->type_node == CCMMC_AST_NODE_ID); + const char *target_str = id->value_id.name; + if (ccmmc_symbol_scope_exist(table->current, target_str)) { + any_error = true; + fprintf (stderr, ERROR("ID `%s' redeclared."), (size_t)0, target_str); + continue; + } + switch (id->value_id.kind) { + case CCMMC_KIND_ID_NORMAL: + ccmmc_symbol_table_insert(table, + target_str, CCMMC_SYMBOL_KIND_TYPE, source_sym->type); + break; + case CCMMC_KIND_ID_ARRAY: { + size_t array_dimension; + size_t *array_size = get_array_size(id, &array_dimension); + if (array_size == NULL) + any_error = true; + CcmmcSymbolType type = { + .type_base = source_sym->type.type_base, + .array_dimension = array_dimension, + .array_size = array_size }; + ccmmc_symbol_table_insert(table, + target_str, CCMMC_SYMBOL_KIND_TYPE, type); + } break; + case CCMMC_KIND_ID_WITH_INIT: + default: + assert(false); + } + } + + return any_error; +} + +static bool process_relop_expr(CcmmcAst *expr, CcmmcSymbolTable *table) +{ + bool any_error = false; + return any_error; +} + +static bool process_variable(CcmmcAst *var_decl, CcmmcSymbolTable *table) +{ + bool any_error = false; + + // The leftmost child is the type of the variable declaration list + assert(var_decl->child != NULL); + assert(var_decl->child->type_node == CCMMC_AST_NODE_ID); + assert(var_decl->child->value_id.kind == CCMMC_KIND_ID_NORMAL); + const char *type_str = var_decl->child->value_id.name; + CcmmcSymbol *type_sym = ccmmc_symbol_table_retrive(table, type_str); + if (type_sym == NULL) { + fprintf(stderr, ERROR("ID `%s' undeclared."), (size_t)0, type_str); + return true; + } + if (type_sym->kind != CCMMC_SYMBOL_KIND_TYPE) { + fprintf(stderr, ERROR("ID `%s' is not a type."), (size_t)0, type_str); + return true; + } + + // Other children are newly declared variables + for (CcmmcAst *init_id = var_decl->child->right_sibling; init_id != NULL; + init_id = init_id->right_sibling) { + assert(init_id->type_node = CCMMC_AST_NODE_ID); + const char *var_str = init_id->value_id.name; + if (ccmmc_symbol_scope_exist(table->current, var_str)) { + any_error = true; + fprintf (stderr, ERROR("ID `%s' redeclared."), (size_t)0, var_str); + continue; + } + switch (init_id->value_id.kind) { + case CCMMC_KIND_ID_NORMAL: + ccmmc_symbol_table_insert(table, + var_str, CCMMC_SYMBOL_KIND_VARIABLE, type_sym->type); + break; + case CCMMC_KIND_ID_ARRAY: { + size_t array_dimension; + size_t *array_size; + if (ccmmc_symbol_is_array(type_sym)) + array_size = get_array_of_array_size( + init_id, &array_dimension, + type_sym->type.array_dimension, + type_sym->type.array_size); + else + array_size = get_array_size (init_id, &array_dimension); + if (array_size == NULL) { + any_error = true; + continue; + } + CcmmcSymbolType type = { + .type_base = type_sym->type.type_base, + .array_dimension = array_dimension, + .array_size = array_size }; + ccmmc_symbol_table_insert(table, + var_str, CCMMC_SYMBOL_KIND_VARIABLE, type); + } break; + case CCMMC_KIND_ID_WITH_INIT: { + assert(ccmmc_symbol_is_scalar(type_sym)); + if (process_relop_expr(init_id, table)) { + any_error = true; + continue; + } + ccmmc_symbol_table_insert(table, + var_str, CCMMC_SYMBOL_KIND_VARIABLE, type_sym->type); + } break; + default: + assert(false); + } + } + return any_error; +} + +static bool process_function(CcmmcAst *function_decl, CcmmcSymbolTable *table) +{ + bool any_error = false; + return any_error; +} + +static bool process_program(CcmmcAst *program, CcmmcSymbolTable *table) +{ + bool any_error = false; + // Process the list of global declarations + for (CcmmcAst *global_decl = program->child; global_decl != NULL; + global_decl = global_decl->right_sibling) { + assert(global_decl->type_node == CCMMC_AST_NODE_DECL); + switch (global_decl->value_decl.kind) { + case CCMMC_KIND_DECL_TYPE: + any_error = any_error || process_typedef(global_decl, table); + break; + case CCMMC_KIND_DECL_VARIABLE: + any_error = any_error || process_variable(global_decl, table); + break; + case CCMMC_KIND_DECL_FUNCTION: + any_error = any_error || process_function(global_decl, table); + break; + case CCMMC_KIND_DECL_FUNCTION_PARAMETER: + default: + assert(false); + } + } + return any_error; +} + +bool ccmmc_semantic_check(CcmmcAst *root, CcmmcSymbolTable *table) { - return true; + bool any_error = false; + // The symbol table must be empty + assert(table->all == NULL && table->all_last == NULL); + assert(table->current == NULL); + // Push the global scope + ccmmc_symbol_table_open_scope(table); + // Insert builtin types + ccmmc_symbol_table_insert(table, "int", CCMMC_SYMBOL_KIND_TYPE, + (CcmmcSymbolType){ .type_base = CCMMC_AST_VALUE_INT }); + ccmmc_symbol_table_insert(table, "float", CCMMC_SYMBOL_KIND_TYPE, + (CcmmcSymbolType){ .type_base = CCMMC_AST_VALUE_FLOAT }); + ccmmc_symbol_table_insert(table, "void", CCMMC_SYMBOL_KIND_TYPE, + (CcmmcSymbolType){ .type_base = CCMMC_AST_VALUE_VOID }); + // Start processing from the program node + any_error = any_error || process_program(root, table); + return !any_error; } // vim: set sw=4 ts=4 sts=4 et: diff --git a/src/symbol-table.c b/src/symbol-table.c index bd00d23..26a104d 100644 --- a/src/symbol-table.c +++ b/src/symbol-table.c @@ -82,4 +82,15 @@ CcmmcSymbol *ccmmc_symbol_table_retrive ( return NULL; } +bool ccmmc_symbol_scope_exist(CcmmcSymbolScope *scope, const char *name) +{ + int key = hash(name); + for (CcmmcSymbol *symbol = scope->hash_table[key]; + symbol != NULL; symbol = symbol->next) { + if (strcmp(name, symbol->name) == 0) + return true; + } + return false; +} + // vim: set sw=4 ts=4 sts=4 et: diff --git a/src/symbol-table.h b/src/symbol-table.h index 9038366..3a50083 100644 --- a/src/symbol-table.h +++ b/src/symbol-table.h @@ -78,6 +78,9 @@ void ccmmc_symbol_table_insert (CcmmcSymbolTable *table, CcmmcSymbolType type); CcmmcSymbol *ccmmc_symbol_table_retrive (CcmmcSymbolTable *table, const char *name); +bool ccmmc_symbol_scope_exist (CcmmcSymbolScope *scope, + const char *name); + #endif // vim: set sw=4 ts=4 sts=4 et: |