summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTing-Wei Lan <lantw44@gmail.com>2015-12-06 22:46:08 +0800
committerTing-Wei Lan <lantw44@gmail.com>2015-12-06 22:46:08 +0800
commit39b409c9625f427966ae577aa3caa4d8caaa56a0 (patch)
tree73c0602e13440813452fcdcdcbecfcc533bca0df
parent21e57e72f5d7417bc2704d0356d66c21c269114a (diff)
downloadcompiler2015-39b409c9625f427966ae577aa3caa4d8caaa56a0.tar
compiler2015-39b409c9625f427966ae577aa3caa4d8caaa56a0.tar.gz
compiler2015-39b409c9625f427966ae577aa3caa4d8caaa56a0.tar.bz2
compiler2015-39b409c9625f427966ae577aa3caa4d8caaa56a0.tar.lz
compiler2015-39b409c9625f427966ae577aa3caa4d8caaa56a0.tar.xz
compiler2015-39b409c9625f427966ae577aa3caa4d8caaa56a0.tar.zst
compiler2015-39b409c9625f427966ae577aa3caa4d8caaa56a0.zip
Implement semantic checks for type and variable declarations
-rw-r--r--src/draw.c64
-rw-r--r--src/draw.h3
-rw-r--r--src/semantic-analysis.c355
-rw-r--r--src/symbol-table.c11
-rw-r--r--src/symbol-table.h3
5 files changed, 434 insertions, 2 deletions
diff --git a/src/draw.c b/src/draw.c
index db0d707..61ea781 100644
--- a/src/draw.c
+++ b/src/draw.c
@@ -189,4 +189,68 @@ void ccmmc_draw_ast(FILE *fp, const char *name, CcmmcAst *root)
fprintf(fp , "}\n");
}
+static void print_symbol_type(FILE *fp, CcmmcSymbolType type)
+{
+ switch (type.type_base) {
+ case CCMMC_AST_VALUE_INT:
+ fputs("int", fp);
+ break;
+ case CCMMC_AST_VALUE_FLOAT:
+ fputs("float", fp);
+ break;
+ case CCMMC_AST_VALUE_VOID:
+ fputs("void", fp);
+ break;
+ case CCMMC_AST_VALUE_INT_PTR:
+ case CCMMC_AST_VALUE_FLOAT_PTR:
+ case CCMMC_AST_VALUE_CONST_STRING:
+ case CCMMC_AST_VALUE_NONE:
+ case CCMMC_AST_VALUE_ERROR:
+ default:
+ assert(false);
+ }
+
+ if (ccmmc_symbol_type_is_array(type))
+ for (size_t i = 0; i < type.array_dimension; i++)
+ fprintf(fp, "[%zu]", type.array_size[i]);
+
+ if (ccmmc_symbol_type_is_function(type)) {
+ fputs(" (*)(", fp);
+ for (size_t i = 0; i < type.param_count; i++) {
+ print_symbol_type(fp, type.param_list[i]);
+ if (i == type.param_count - 1)
+ fputs(")", fp);
+ else
+ fputs(", ", fp);
+ }
+ }
+}
+
+void ccmmc_draw_symbol_scope(FILE *fp, CcmmcSymbolScope *scope)
+{
+ for (int i = 0; i < CCMMC_SYMBOL_SCOPE_HASH_TABLE_SIZE; i++) {
+ for (CcmmcSymbol *symbol = scope->hash_table[i]; symbol != NULL;
+ symbol = symbol->next) {
+ fprintf(fp, "Bucket %d: %s, ", i, symbol->name);
+ switch (symbol->kind) {
+ case CCMMC_SYMBOL_KIND_TYPE:
+ fputs("TYPE, ", fp);
+ print_symbol_type(fp, symbol->type);
+ break;
+ case CCMMC_SYMBOL_KIND_VARIABLE:
+ fputs("VARIABLE, ", fp);
+ print_symbol_type(fp, symbol->type);
+ break;
+ case CCMMC_SYMBOL_KIND_FUNCTION:
+ fputs("FUNCTION, ", fp);
+ print_symbol_type(fp, symbol->type);
+ break;
+ default:
+ assert(false);
+ }
+ putc('\n', fp);
+ }
+ }
+}
+
// vim: set sw=4 ts=4 sts=4 et:
diff --git a/src/draw.h b/src/draw.h
index 9636ac0..d32b9ad 100644
--- a/src/draw.h
+++ b/src/draw.h
@@ -2,12 +2,15 @@
#define CCMMC_HEADER_DRAW_H
#include "ast.h"
+#include "symbol-table.h"
#include <stdio.h>
void ccmmc_draw_ast (FILE *fp,
const char *name,
CcmmcAst *root);
+void ccmmc_draw_symbol_scope (FILE *fp,
+ CcmmcSymbolScope *scope);
#endif
// vim: set sw=4 ts=4 sts=4 et:
diff --git a/src/semantic-analysis.c b/src/semantic-analysis.c
index f7c1f9c..9bed272 100644
--- a/src/semantic-analysis.c
+++ b/src/semantic-analysis.c
@@ -3,13 +3,364 @@
#endif
#include "semantic-analysis.h"
+#include "common.h"
+#include <assert.h>
#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#define ERROR(msg) ("Error found in line %zu\n" msg "\n")
-bool ccmmc_semantic_check (CcmmcAst *root, CcmmcSymbolTable *table)
+
+static CcmmcValueConst eval_const_expr(CcmmcAst *expr) {
+ if (expr->type_node == CCMMC_AST_NODE_CONST_VALUE) {
+ return expr->value_const;
+ }
+ assert(expr->type_node == CCMMC_AST_NODE_EXPR);
+ assert(expr->value_expr.kind == CCMMC_KIND_EXPR_BINARY_OP);
+ CcmmcValueConst left = eval_const_expr(expr->child);
+ CcmmcValueConst right = eval_const_expr(expr->child->right_sibling);
+ switch (expr->value_expr.op_binary) {
+ case CCMMC_KIND_OP_BINARY_ADD:
+ if (left.kind == CCMMC_KIND_CONST_INT) {
+ if (right.kind == CCMMC_KIND_CONST_INT) {
+ return (CcmmcValueConst){ .kind = CCMMC_KIND_CONST_INT,
+ .const_int = left.const_int + right.const_int };
+ } else {
+ float left_value = left.const_int;
+ float right_value = right.const_float;
+ return (CcmmcValueConst){ .kind = CCMMC_KIND_CONST_FLOAT,
+ .const_float = left_value + right_value };
+ }
+ } else {
+ if (right.kind == CCMMC_KIND_CONST_INT) {
+ float left_value = left.const_float;
+ float right_value = right.const_int;
+ return (CcmmcValueConst){ .kind = CCMMC_KIND_CONST_FLOAT,
+ .const_float = left_value + right_value };
+ } else {
+ return (CcmmcValueConst){ .kind = CCMMC_KIND_CONST_FLOAT,
+ .const_float = left.const_float + right.const_float };
+ }
+ }
+ case CCMMC_KIND_OP_BINARY_SUB:
+ if (left.kind == CCMMC_KIND_CONST_INT) {
+ if (right.kind == CCMMC_KIND_CONST_INT) {
+ return (CcmmcValueConst){ .kind = CCMMC_KIND_CONST_INT,
+ .const_int = left.const_int - right.const_int };
+ } else {
+ float left_value = left.const_int;
+ float right_value = right.const_float;
+ return (CcmmcValueConst){ .kind = CCMMC_KIND_CONST_FLOAT,
+ .const_float = left_value - right_value };
+ }
+ } else {
+ if (right.kind == CCMMC_KIND_CONST_INT) {
+ float left_value = left.const_float;
+ float right_value = right.const_int;
+ return (CcmmcValueConst){ .kind = CCMMC_KIND_CONST_FLOAT,
+ .const_float = left_value - right_value };
+ } else {
+ return (CcmmcValueConst){ .kind = CCMMC_KIND_CONST_FLOAT,
+ .const_float = left.const_float - right.const_float };
+ }
+ }
+ case CCMMC_KIND_OP_BINARY_MUL:
+ if (left.kind == CCMMC_KIND_CONST_INT) {
+ if (right.kind == CCMMC_KIND_CONST_INT) {
+ return (CcmmcValueConst){ .kind = CCMMC_KIND_CONST_INT,
+ .const_int = left.const_int * right.const_int };
+ } else {
+ float left_value = left.const_int;
+ float right_value = right.const_float;
+ return (CcmmcValueConst){ .kind = CCMMC_KIND_CONST_FLOAT,
+ .const_float = left_value * right_value };
+ }
+ } else {
+ if (right.kind == CCMMC_KIND_CONST_INT) {
+ float left_value = left.const_float;
+ float right_value = right.const_int;
+ return (CcmmcValueConst){ .kind = CCMMC_KIND_CONST_FLOAT,
+ .const_float = left_value * right_value };
+ } else {
+ return (CcmmcValueConst){ .kind = CCMMC_KIND_CONST_FLOAT,
+ .const_float = left.const_float * right.const_float };
+ }
+ }
+ case CCMMC_KIND_OP_BINARY_DIV:
+ if (left.kind == CCMMC_KIND_CONST_INT) {
+ if (right.kind == CCMMC_KIND_CONST_INT) {
+ if (right.const_int == 0) {
+ fprintf(stderr, ERROR("Integer division by zero."), (size_t)0);
+ // XXX: We should use an invalid type
+ return (CcmmcValueConst){ .kind = CCMMC_KIND_CONST_STRING };
+ }
+ return (CcmmcValueConst){ .kind = CCMMC_KIND_CONST_INT,
+ .const_int = left.const_int / right.const_int };
+ } else {
+ float left_value = left.const_int;
+ float right_value = right.const_float;
+ return (CcmmcValueConst){ .kind = CCMMC_KIND_CONST_FLOAT,
+ .const_float = left_value / right_value };
+ }
+ } else {
+ if (right.kind == CCMMC_KIND_CONST_INT) {
+ float left_value = left.const_float;
+ float right_value = right.const_int;
+ return (CcmmcValueConst){ .kind = CCMMC_KIND_CONST_FLOAT,
+ .const_float = left_value / right_value };
+ } else {
+ return (CcmmcValueConst){ .kind = CCMMC_KIND_CONST_FLOAT,
+ .const_float = left.const_float / right.const_float };
+ }
+ }
+ case CCMMC_KIND_OP_BINARY_EQ:
+ case CCMMC_KIND_OP_BINARY_GE:
+ case CCMMC_KIND_OP_BINARY_LE:
+ case CCMMC_KIND_OP_BINARY_NE:
+ case CCMMC_KIND_OP_BINARY_GT:
+ case CCMMC_KIND_OP_BINARY_LT:
+ case CCMMC_KIND_OP_BINARY_AND:
+ case CCMMC_KIND_OP_BINARY_OR:
+ default:
+ assert(false);
+ }
+}
+
+static size_t *get_array_size(CcmmcAst *id_array, size_t *array_dimension)
+{
+ CcmmcAst *dim;
+ size_t dim_count = 0;
+ for (dim = id_array->child; dim != NULL; dim = dim->right_sibling, dim_count++);
+ assert(dim_count != 0);
+
+ size_t dim_index = 0;
+ size_t *array_size = malloc(sizeof(size_t) * dim_count);
+ ERR_FATAL_CHECK(array_size, malloc);
+ for (dim = id_array->child; dim != NULL; dim = dim->right_sibling, dim_index++) {
+ CcmmcValueConst value = eval_const_expr(dim);
+ if (value.kind != CCMMC_KIND_CONST_INT) {
+ fprintf(stderr, ERROR("Array subscript is not an integer."), (size_t)0);
+ free(array_size);
+ return NULL;
+ }
+ if (value.const_int <= 0) {
+ fprintf(stderr, ERROR("Array size must be positive."), (size_t)0);
+ free(array_size);
+ return NULL;
+ }
+ array_size[dim_index] = value.const_int;
+ }
+
+ *array_dimension = dim_count;
+ return array_size;
+}
+
+static size_t *get_array_of_array_size(CcmmcAst *id_array, size_t *array_dimension,
+ size_t base_array_dimension, size_t *base_array_size)
+{
+ size_t this_array_dimension;
+ size_t *this_array_size = get_array_size(id_array, &this_array_dimension);
+ if (this_array_size == NULL) {
+ free (this_array_size);
+ return NULL;
+ }
+
+ size_t dim_count = this_array_dimension + base_array_dimension;
+ size_t *array_size = realloc(this_array_size, sizeof(size_t) * dim_count);
+ ERR_FATAL_CHECK(array_size, realloc);
+ memcpy(array_size + this_array_dimension,
+ base_array_size, sizeof(size_t) * base_array_dimension);
+
+ *array_dimension = dim_count;
+ return array_size;
+}
+
+static bool process_typedef(CcmmcAst *type_decl, CcmmcSymbolTable *table)
+{
+ bool any_error = false;
+
+ // The leftmost child is an existing type
+ assert(type_decl->child != NULL);
+ assert(type_decl->child->type_node == CCMMC_AST_NODE_ID);
+ assert(type_decl->child->value_id.kind == CCMMC_KIND_ID_NORMAL);
+ const char *source_str = type_decl->child->value_id.name;
+ CcmmcSymbol *source_sym = ccmmc_symbol_table_retrive(table, source_str);
+ // We don't support an existing array typedef
+ assert(ccmmc_symbol_is_scalar(source_sym));
+ if (source_sym == NULL) {
+ fprintf(stderr, ERROR("ID `%s' undeclared."), (size_t)0, source_str);
+ return true;
+ }
+ if (source_sym->kind != CCMMC_SYMBOL_KIND_TYPE) {
+ fprintf(stderr, ERROR("ID `%s' is not a type."), (size_t)0, source_str);
+ return true;
+ }
+
+ // Other children are newly declared types
+ for (CcmmcAst *id = type_decl->child->right_sibling; id != NULL;
+ id = id->right_sibling) {
+ assert(id->type_node == CCMMC_AST_NODE_ID);
+ const char *target_str = id->value_id.name;
+ if (ccmmc_symbol_scope_exist(table->current, target_str)) {
+ any_error = true;
+ fprintf (stderr, ERROR("ID `%s' redeclared."), (size_t)0, target_str);
+ continue;
+ }
+ switch (id->value_id.kind) {
+ case CCMMC_KIND_ID_NORMAL:
+ ccmmc_symbol_table_insert(table,
+ target_str, CCMMC_SYMBOL_KIND_TYPE, source_sym->type);
+ break;
+ case CCMMC_KIND_ID_ARRAY: {
+ size_t array_dimension;
+ size_t *array_size = get_array_size(id, &array_dimension);
+ if (array_size == NULL)
+ any_error = true;
+ CcmmcSymbolType type = {
+ .type_base = source_sym->type.type_base,
+ .array_dimension = array_dimension,
+ .array_size = array_size };
+ ccmmc_symbol_table_insert(table,
+ target_str, CCMMC_SYMBOL_KIND_TYPE, type);
+ } break;
+ case CCMMC_KIND_ID_WITH_INIT:
+ default:
+ assert(false);
+ }
+ }
+
+ return any_error;
+}
+
+static bool process_relop_expr(CcmmcAst *expr, CcmmcSymbolTable *table)
+{
+ bool any_error = false;
+ return any_error;
+}
+
+static bool process_variable(CcmmcAst *var_decl, CcmmcSymbolTable *table)
+{
+ bool any_error = false;
+
+ // The leftmost child is the type of the variable declaration list
+ assert(var_decl->child != NULL);
+ assert(var_decl->child->type_node == CCMMC_AST_NODE_ID);
+ assert(var_decl->child->value_id.kind == CCMMC_KIND_ID_NORMAL);
+ const char *type_str = var_decl->child->value_id.name;
+ CcmmcSymbol *type_sym = ccmmc_symbol_table_retrive(table, type_str);
+ if (type_sym == NULL) {
+ fprintf(stderr, ERROR("ID `%s' undeclared."), (size_t)0, type_str);
+ return true;
+ }
+ if (type_sym->kind != CCMMC_SYMBOL_KIND_TYPE) {
+ fprintf(stderr, ERROR("ID `%s' is not a type."), (size_t)0, type_str);
+ return true;
+ }
+
+ // Other children are newly declared variables
+ for (CcmmcAst *init_id = var_decl->child->right_sibling; init_id != NULL;
+ init_id = init_id->right_sibling) {
+ assert(init_id->type_node = CCMMC_AST_NODE_ID);
+ const char *var_str = init_id->value_id.name;
+ if (ccmmc_symbol_scope_exist(table->current, var_str)) {
+ any_error = true;
+ fprintf (stderr, ERROR("ID `%s' redeclared."), (size_t)0, var_str);
+ continue;
+ }
+ switch (init_id->value_id.kind) {
+ case CCMMC_KIND_ID_NORMAL:
+ ccmmc_symbol_table_insert(table,
+ var_str, CCMMC_SYMBOL_KIND_VARIABLE, type_sym->type);
+ break;
+ case CCMMC_KIND_ID_ARRAY: {
+ size_t array_dimension;
+ size_t *array_size;
+ if (ccmmc_symbol_is_array(type_sym))
+ array_size = get_array_of_array_size(
+ init_id, &array_dimension,
+ type_sym->type.array_dimension,
+ type_sym->type.array_size);
+ else
+ array_size = get_array_size (init_id, &array_dimension);
+ if (array_size == NULL) {
+ any_error = true;
+ continue;
+ }
+ CcmmcSymbolType type = {
+ .type_base = type_sym->type.type_base,
+ .array_dimension = array_dimension,
+ .array_size = array_size };
+ ccmmc_symbol_table_insert(table,
+ var_str, CCMMC_SYMBOL_KIND_VARIABLE, type);
+ } break;
+ case CCMMC_KIND_ID_WITH_INIT: {
+ assert(ccmmc_symbol_is_scalar(type_sym));
+ if (process_relop_expr(init_id, table)) {
+ any_error = true;
+ continue;
+ }
+ ccmmc_symbol_table_insert(table,
+ var_str, CCMMC_SYMBOL_KIND_VARIABLE, type_sym->type);
+ } break;
+ default:
+ assert(false);
+ }
+ }
+ return any_error;
+}
+
+static bool process_function(CcmmcAst *function_decl, CcmmcSymbolTable *table)
+{
+ bool any_error = false;
+ return any_error;
+}
+
+static bool process_program(CcmmcAst *program, CcmmcSymbolTable *table)
+{
+ bool any_error = false;
+ // Process the list of global declarations
+ for (CcmmcAst *global_decl = program->child; global_decl != NULL;
+ global_decl = global_decl->right_sibling) {
+ assert(global_decl->type_node == CCMMC_AST_NODE_DECL);
+ switch (global_decl->value_decl.kind) {
+ case CCMMC_KIND_DECL_TYPE:
+ any_error = any_error || process_typedef(global_decl, table);
+ break;
+ case CCMMC_KIND_DECL_VARIABLE:
+ any_error = any_error || process_variable(global_decl, table);
+ break;
+ case CCMMC_KIND_DECL_FUNCTION:
+ any_error = any_error || process_function(global_decl, table);
+ break;
+ case CCMMC_KIND_DECL_FUNCTION_PARAMETER:
+ default:
+ assert(false);
+ }
+ }
+ return any_error;
+}
+
+bool ccmmc_semantic_check(CcmmcAst *root, CcmmcSymbolTable *table)
{
- return true;
+ bool any_error = false;
+ // The symbol table must be empty
+ assert(table->all == NULL && table->all_last == NULL);
+ assert(table->current == NULL);
+ // Push the global scope
+ ccmmc_symbol_table_open_scope(table);
+ // Insert builtin types
+ ccmmc_symbol_table_insert(table, "int", CCMMC_SYMBOL_KIND_TYPE,
+ (CcmmcSymbolType){ .type_base = CCMMC_AST_VALUE_INT });
+ ccmmc_symbol_table_insert(table, "float", CCMMC_SYMBOL_KIND_TYPE,
+ (CcmmcSymbolType){ .type_base = CCMMC_AST_VALUE_FLOAT });
+ ccmmc_symbol_table_insert(table, "void", CCMMC_SYMBOL_KIND_TYPE,
+ (CcmmcSymbolType){ .type_base = CCMMC_AST_VALUE_VOID });
+ // Start processing from the program node
+ any_error = any_error || process_program(root, table);
+ return !any_error;
}
// vim: set sw=4 ts=4 sts=4 et:
diff --git a/src/symbol-table.c b/src/symbol-table.c
index bd00d23..26a104d 100644
--- a/src/symbol-table.c
+++ b/src/symbol-table.c
@@ -82,4 +82,15 @@ CcmmcSymbol *ccmmc_symbol_table_retrive (
return NULL;
}
+bool ccmmc_symbol_scope_exist(CcmmcSymbolScope *scope, const char *name)
+{
+ int key = hash(name);
+ for (CcmmcSymbol *symbol = scope->hash_table[key];
+ symbol != NULL; symbol = symbol->next) {
+ if (strcmp(name, symbol->name) == 0)
+ return true;
+ }
+ return false;
+}
+
// vim: set sw=4 ts=4 sts=4 et:
diff --git a/src/symbol-table.h b/src/symbol-table.h
index 9038366..3a50083 100644
--- a/src/symbol-table.h
+++ b/src/symbol-table.h
@@ -78,6 +78,9 @@ void ccmmc_symbol_table_insert (CcmmcSymbolTable *table,
CcmmcSymbolType type);
CcmmcSymbol *ccmmc_symbol_table_retrive (CcmmcSymbolTable *table,
const char *name);
+bool ccmmc_symbol_scope_exist (CcmmcSymbolScope *scope,
+ const char *name);
+
#endif
// vim: set sw=4 ts=4 sts=4 et: