Repository: jserv/MazuCC Branch: master Commit: 35c554cbc3fa Files: 30 Total size: 93.7 KB Directory structure: gitextract_yg7l8jph/ ├── .clang-format ├── .gitignore ├── LICENSE ├── Makefile ├── README.md ├── codegen_x64.c ├── dict.h ├── lexer.c ├── list.h ├── main.c ├── mzcc.h ├── parser.c ├── sample/ │ └── nqueen.c ├── tests/ │ ├── arith.c │ ├── array.c │ ├── comp.c │ ├── control.c │ ├── decl.c │ ├── driver.sh │ ├── float.c │ ├── function.c │ ├── global.c │ ├── long.c │ ├── pointer.c │ ├── pointer_arith.c │ ├── scope.c │ ├── struct.c │ └── union.c ├── util.h └── verbose.c ================================================ FILE CONTENTS ================================================ ================================================ FILE: .clang-format ================================================ BasedOnStyle: Chromium Language: Cpp MaxEmptyLinesToKeep: 3 IndentCaseLabels: false AllowShortIfStatementsOnASingleLine: false AllowShortCaseLabelsOnASingleLine: false AllowShortLoopsOnASingleLine: false DerivePointerAlignment: false PointerAlignment: Right SpaceAfterCStyleCast: true TabWidth: 4 UseTab: Never IndentWidth: 4 BreakBeforeBraces: Linux AccessModifierOffset: -4 ForEachMacros: - foreach - Q_FOREACH - BOOST_FOREACH - list_for_each - list_for_each_safe - list_for_each_entry - list_for_each_entry_safe - hlist_for_each_entry - rb_list_foreach - rb_list_foreach_safe ================================================ FILE: .gitignore ================================================ mzcc *.o *.o.d tmp.* nqueen.s nqueen *.bin *.dSYM .cbuild ================================================ FILE: LICENSE ================================================ Copyright (c) 2019-2020 National Cheng Kung University, Taiwan. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ================================================ FILE: Makefile ================================================ TARGET = mzcc CFLAGS = -Wall -Werror -std=gnu99 -g -I. UNAME_S := $(shell uname -s) ifeq ($(UNAME_S),Linux) CFLAGS += -no-pie endif SHELL_HACK := $(shell echo CBUILD=\"$(CC) $(CFLAGS)\" > .cbuild) # Control the build verbosity # `make V=1` is equal to `make VERBOSE=1` ifeq ("$(origin V)", "command line") VERBOSE = $(V) endif ifeq ("$(VERBOSE)","1") Q := VECHO = @true else Q := @ VECHO = @printf endif OBJS = lexer.o codegen_x64.o parser.o verbose.o main.o deps := $(OBJS:%.o=.%.o.d) %.o: %.c $(VECHO) " CC\t$@\n" $(Q)$(CC) -o $@ $(CFLAGS) -c -MMD -MF .$@.d $< $(TARGET): $(OBJS) $(VECHO) " LD\t$@\n" $(Q)$(CC) $(CFLAGS) -o $@ $^ TESTS := $(patsubst %.c,%.bin,$(wildcard tests/*.c)) PASS_COLOR = \e[32;01m NO_COLOR = \e[0m pass = printf "[ $(PASS_COLOR)Passed$(NO_COLOR) ]\n" check: nqueen $(TESTS) @echo @for test in $(TESTS); do \ printf "*** verify $$test ***\n" ; \ head -n 1 `echo $$test | sed s/.bin/.c/`; \ ./$$test; \ $(call pass,$$test); \ echo; \ done tests/driver.sh tests/%.s: tests/%.c $(TARGET) ./mzcc -o $@ $< tests/%.bin: tests/%.s $(TARGET) $(VECHO) " CC\t$@\n" $(Q)$(CC) $(CFLAGS) -o $@ $< nqueen: sample/nqueen.c $(TARGET) $(VECHO) " MazuCC\t$<\n" $(Q)./mzcc -o ${<:.c=.s} $< $(VECHO) " AS+LD\t\t$@\n" $(Q)$(CC) $(CFLAGS) -o sample/nqueen sample/nqueen.s .PHONY: clean check clean: $(RM) $(TARGET) $(TESTS) $(OBJS) $(deps) .cbuild $(RM) sample/*.o sample/nqueen.s sample/nqueen -include $(deps) ================================================ FILE: README.md ================================================ # MazuCC MazuCC is a minimalist C compiler with x86_64 code generation. It is intended to support partial C99 language features while keeping the code as small and simple as possible. ## Build Run make to build: ```shell $ make ``` MazuCC comes with unit tests. To run the tests, give "check" as an argument: ```shell $ make check ``` MazuCC is known to work on both GNU/Linux and macOS. Use MazuCC to compile C source: ```shell $ ./mzcc sample/nqueen.c ``` Alternatively, MazuCC accepts the stream from standard input. The equivalent form for the above command is: ```shell $ cat sample/nqueen.c | ./mzcc - ``` You will get the generated x86_64 assembly in AT&T syntax. The output can be assembled and linked into a valid executable: ```shell $ ./mzcc -o sample/nqueen.s sample/nqueen.c $ gcc -no-pie -o sample/nqueen sample/nqueen.s ``` If MazuCC is compiled and executed on macOS, the above argument `-no-pie` should be eliminated. Reference output of MazuCC-compiled `sample/nqueen`: ``` Q . . . . . . . . . . . Q . . . . . . . . . . Q . . . . . Q . . . . Q . . . . . . . . . . . Q . . Q . . . . . . . . . Q . . . . ``` Alternatively, you can dump internal abstract syntax tree: ```shell echo 'struct {int x; char y; struct { int t; } z; } a;' | ./mzcc --dump-ast - ``` The expected output in S-expression form: ``` (decl (struct (int) (char) ((struct (int)))) a) ``` ## Acknowledge MazuCC is heavily inspired by [8cc](https://github.com/rui314/8cc). ## License MazuCC is freely redistributable under the BSD 2 clause license. Use of this source code is governed by a BSD-style license that can be found in the LICENSE file. ================================================ FILE: codegen_x64.c ================================================ #include #include #include "mzcc.h" static char *REGS[] = {"rdi", "rsi", "rdx", "rcx", "r8", "r9"}; static int TAB = 8; static List *functions = &EMPTY_LIST; /* FIXME: main program should take extern variables from codegen. */ FILE *outfp; static int stackpos; static void emit_expr(Ast *ast); static void emit_load_deref(Ctype *result_type, Ctype *operand_type, int off); #define emit(...) emitf(__LINE__, "\t" __VA_ARGS__) #define emit_label(...) emitf(__LINE__, __VA_ARGS__) #define UNUSED __attribute__((unused)) #define SAVE \ int save_hook __attribute__((cleanup(pop_function))) UNUSED; \ list_push(functions, (void *) __func__) static void pop_function(void *ignore UNUSED) { list_pop(functions); } static char *get_caller_list(void) { String s = make_string(); for (Iter i = list_iter(functions); !iter_end(i);) { string_appendf(&s, "%s", iter_next(&i)); if (!iter_end(i)) string_appendf(&s, " -> "); } return get_cstring(s); } static void emitf(int line, char *fmt, ...) { va_list args; va_start(args, fmt); int col = vfprintf(outfp, fmt, args); va_end(args); for (char *p = fmt; *p; p++) if (*p == '\t') col += TAB - 1; int space = (28 - col) > 0 ? (30 - col) : 2; fprintf(outfp, "%*c %s:%d\n", space, '#', get_caller_list(), line); } static char *get_int_reg(Ctype *ctype, char r) { assert(r == 'a' || r == 'c'); switch (ctype->size) { case 1: return (r == 'a') ? "al" : "cl"; case 4: return (r == 'a') ? "eax" : "ecx"; case 8: return (r == 'a') ? "rax" : "rcx"; default: error("Unknown data size: %s: %d", ctype_to_string(ctype), ctype->size); return 0; /* non-reachable */ } } static void push_xmm(int reg) { SAVE; emit("sub $8, %%rsp"); emit("movsd %%xmm%d, (%%rsp)", reg); stackpos += 8; } static void pop_xmm(int reg) { SAVE; emit("movsd (%%rsp), %%xmm%d", reg); emit("add $8, %%rsp"); stackpos -= 8; assert(stackpos >= 0); } static void push(char *reg) { SAVE; emit("push %%%s", reg); stackpos += 8; } static void pop(char *reg) { SAVE; emit("pop %%%s", reg); stackpos -= 8; assert(stackpos >= 0); } static void emit_gload(Ctype *ctype, char *label, int off) { SAVE; if (ctype->type == CTYPE_ARRAY) { if (off) emit("lea %s+%d(%%rip), %%rax", label, off); else emit("lea %s(%%rip), %%rax", label); return; } char *reg = get_int_reg(ctype, 'a'); if (ctype->size == 1) emit("mov $0, %%eax"); if (off) emit("mov %s+%d(%%rip), %%%s", label, off, reg); else emit("mov %s(%%rip), %%%s", label, reg); } static void emit_toint(Ctype *ctype) { SAVE; if (!is_flotype(ctype)) return; emit("cvttsd2si %%xmm0, %%eax"); } static void emit_todouble(Ctype *ctype) { SAVE; if (is_flotype(ctype)) return; emit("cvtsi2sd %%eax, %%xmm0"); } static void emit_lload(Ctype *ctype, int off) { SAVE; if (ctype->type == CTYPE_ARRAY) { emit("lea %d(%%rbp), %%rax", off); } else if (ctype->type == CTYPE_FLOAT) { emit("cvtps2pd %d(%%rbp), %%xmm0", off); } else if (ctype->type == CTYPE_DOUBLE) { emit("movsd %d(%%rbp), %%xmm0", off); } else { char *reg = get_int_reg(ctype, 'a'); if (ctype->size == 1) emit("mov $0, %%eax"); emit("mov %d(%%rbp), %%%s", off, reg); } } static void emit_gsave(char *varname, Ctype *ctype, int off) { SAVE; assert(ctype->type != CTYPE_ARRAY); char *reg = get_int_reg(ctype, 'a'); if (off) emit("mov %%%s, %s+%d(%%rip)", reg, varname, off); else emit("mov %%%s, %s(%%rip)", reg, varname); } static void emit_lsave(Ctype *ctype, int off) { SAVE; if (ctype->type == CTYPE_FLOAT) { emit("cvtpd2ps %%xmm0, %d(%%rbp)", off); } else if (ctype->type == CTYPE_DOUBLE) { emit("movsd %%xmm0, %d(%%rbp)", off); } else { char *reg = get_int_reg(ctype, 'a'); emit("mov %%%s, %d(%%rbp)", reg, off); } } static void emit_assign_deref_int(Ctype *ctype, int off) { SAVE; emit("mov (%%rsp), %%rcx"); char *reg = get_int_reg(ctype, 'c'); if (off) emit("mov %%%s, %d(%%rax)", reg, off); else emit("mov %%%s, (%%rax)", reg); pop("rax"); } static void emit_assign_deref(Ast *var) { SAVE; push("rax"); emit_expr(var->operand); emit_assign_deref_int(var->operand->ctype->ptr, 0); } static void emit_pointer_arith(char op UNUSED, Ast *left, Ast *right) { SAVE; emit_expr(left); push("rax"); emit_expr(right); int size = left->ctype->ptr->size; if (size > 1) emit("imul $%d, %%rax", size); emit("mov %%rax, %%rcx"); pop("rax"); emit("add %%rcx, %%rax"); } static void emit_assign_struct_ref(Ast *struc, Ctype *field, int off) { SAVE; switch (struc->type) { case AST_LVAR: emit_lsave(field, struc->loff + field->offset + off); break; case AST_GVAR: emit_gsave(struc->varname, field, field->offset + off); break; case AST_STRUCT_REF: emit_assign_struct_ref(struc->struc, field, off + struc->ctype->offset); break; case AST_DEREF: push("rax"); emit_expr(struc->operand); emit_assign_deref_int(field, field->offset + off); break; default: error("internal error: %s", ast_to_string(struc)); } } static void emit_load_struct_ref(Ast *struc, Ctype *field, int off) { SAVE; switch (struc->type) { case AST_LVAR: emit_lload(field, struc->loff + field->offset + off); break; case AST_GVAR: emit_gload(field, struc->varname, field->offset + off); break; case AST_STRUCT_REF: emit_load_struct_ref(struc->struc, field, struc->ctype->offset + off); break; case AST_DEREF: emit_expr(struc->operand); emit_load_deref(struc->ctype, field, field->offset + off); break; default: error("internal error: %s", ast_to_string(struc)); } } static void emit_assign(Ast *var) { SAVE; switch (var->type) { case AST_DEREF: emit_assign_deref(var); break; case AST_STRUCT_REF: emit_assign_struct_ref(var->struc, var->ctype, 0); break; case AST_LVAR: emit_lsave(var->ctype, var->loff); break; case AST_GVAR: emit_gsave(var->varname, var->ctype, 0); break; default: error("internal error"); } } static void emit_comp(char *inst, Ast *ast) { SAVE; if (is_flotype(ast->ctype)) { emit_expr(ast->left); emit_todouble(ast->left->ctype); push_xmm(0); emit_expr(ast->right); emit_todouble(ast->right->ctype); pop_xmm(1); emit("ucomisd %%xmm0, %%xmm1"); } else { emit_expr(ast->left); emit_toint(ast->left->ctype); push("rax"); emit_expr(ast->right); emit_toint(ast->right->ctype); pop("rcx"); emit("cmp %%rax, %%rcx"); } emit("%s %%al", inst); emit("movzb %%al, %%eax"); } static void emit_binop_int_arith(Ast *ast) { SAVE; char *op = NULL; switch (ast->type) { case '+': op = "add"; break; case '-': op = "sub"; break; case '*': op = "imul"; break; case '/': break; case PUNCT_LSHIFT: op = "sal"; break; case PUNCT_RSHIFT: op = "sar"; break; default: error("invalid operator '%d'", ast->type); } emit_expr(ast->left); emit_toint(ast->left->ctype); push("rax"); emit_expr(ast->right); emit_toint(ast->right->ctype); emit("mov %%rax, %%rcx"); pop("rax"); if (ast->type == '/') { emit("mov $0, %%edx"); emit("idiv %%rcx"); } else if (ast->type == PUNCT_LSHIFT || ast->type == PUNCT_RSHIFT) { emit("%s %%cl, %%rax", op); } else { emit("%s %%rcx, %%rax", op); } } static void emit_binop_float_arith(Ast *ast) { SAVE; char *op; switch (ast->type) { case '+': op = "addsd"; break; case '-': op = "subsd"; break; case '*': op = "mulsd"; break; case '/': op = "divsd"; break; default: error("invalid operator '%d'", ast->type); return; /* non-reachable */ } emit_expr(ast->left); emit_todouble(ast->left->ctype); push_xmm(0); emit_expr(ast->right); emit_todouble(ast->right->ctype); emit("movsd %%xmm0, %%xmm1"); pop_xmm(0); emit("%s %%xmm1, %%xmm0", op); } static void emit_binop(Ast *ast) { SAVE; if (ast->type == '=') { emit_expr(ast->right); if (is_flotype(ast->ctype)) emit_todouble(ast->right->ctype); else emit_toint(ast->right->ctype); emit_assign(ast->left); return; } if (ast->type == PUNCT_EQ) { emit_comp("sete", ast); return; } if (ast->ctype->type == CTYPE_PTR) { emit_pointer_arith(ast->type, ast->left, ast->right); return; } switch (ast->type) { case '<': emit_comp("setl", ast); return; case '>': emit_comp("setg", ast); return; } if (is_inttype(ast->ctype)) emit_binop_int_arith(ast); else if (is_flotype(ast->ctype)) emit_binop_float_arith(ast); else error("internal error"); } static void emit_inc_dec(Ast *ast, char *op) { SAVE; emit_expr(ast->operand); push("rax"); emit("%s $1, %%rax", op); emit_assign(ast->operand); pop("rax"); } static void emit_load_deref(Ctype *result_type, Ctype *operand_type, int off) { SAVE; if (operand_type->type == CTYPE_PTR && operand_type->ptr->type == CTYPE_ARRAY) return; char *reg = get_int_reg(result_type, 'c'); if (result_type->size == 1) emit("mov $0, %%ecx"); if (off) emit("mov %d(%%rax), %%%s", off, reg); else emit("mov (%%rax), %%%s", reg); emit("mov %%rcx, %%rax"); } static void emit_expr(Ast *ast) { SAVE; switch (ast->type) { case AST_LITERAL: switch (ast->ctype->type) { case CTYPE_CHAR: emit("mov $%d, %%rax", ast->ival); break; case CTYPE_INT: emit("mov $%d, %%eax", ast->ival); break; case CTYPE_LONG: emit("mov $%lu, %%rax", (unsigned long) ast->ival); break; case CTYPE_FLOAT: case CTYPE_DOUBLE: emit("movsd %s(%%rip), %%xmm0", ast->flabel); break; default: error("internal error"); } break; case AST_STRING: emit("lea %s(%%rip), %%rax", ast->slabel); break; case AST_LVAR: emit_lload(ast->ctype, ast->loff); break; case AST_GVAR: emit_gload(ast->ctype, ast->glabel, 0); break; case AST_FUNCALL: { int ireg = 0; int xreg = 0; for (Iter i = list_iter(ast->args); !iter_end(i);) { Ast *v = iter_next(&i); if (is_flotype(v->ctype)) push_xmm(xreg++); else push(REGS[ireg++]); } for (Iter i = list_iter(ast->args); !iter_end(i);) { Ast *v = iter_next(&i); emit_expr(v); if (is_flotype(v->ctype)) push_xmm(0); else push("rax"); } int ir = ireg; int xr = xreg; List *reverse = list_reverse(ast->args); for (Iter i = list_iter(reverse); !iter_end(i);) { Ast *v = iter_next(&i); if (is_flotype(v->ctype)) pop_xmm(--xr); else pop(REGS[--ir]); } emit("mov $%d, %%eax", xreg); if (stackpos % 16) emit("sub $8, %%rsp"); #ifdef __APPLE__ emit("call _%s", ast->fname); #else emit("call %s", ast->fname); #endif if (stackpos % 16) emit("add $8, %%rsp"); for (Iter i = list_iter(reverse); !iter_end(i);) { Ast *v = iter_next(&i); if (is_flotype(v->ctype)) pop_xmm(--xreg); else pop(REGS[--ireg]); } ListNode *node, *tmp; list_for_each_safe (node, tmp, reverse) free(node); free(reverse); break; } case AST_DECL: { if (!ast->declinit) return; if (ast->declinit->type == AST_ARRAY_INIT) { int off = 0; for (Iter iter = list_iter(ast->declinit->arrayinit); !iter_end(iter);) { emit_expr(iter_next(&iter)); emit_lsave(ast->declvar->ctype->ptr, ast->declvar->loff + off); off += ast->declvar->ctype->ptr->size; } } else if (ast->declvar->ctype->type == CTYPE_ARRAY) { assert(ast->declinit->type == AST_STRING); int i = 0; for (char *p = ast->declinit->sval; *p; p++, i++) emit("movb $%d, %d(%%rbp)", *p, ast->declvar->loff + i); emit("movb $0, %d(%%rbp)", ast->declvar->loff + i); } else if (ast->declinit->type == AST_STRING) { emit_gload(ast->declinit->ctype, ast->declinit->slabel, 0); emit_lsave(ast->declvar->ctype, ast->declvar->loff); } else { emit_expr(ast->declinit); emit_lsave(ast->declvar->ctype, ast->declvar->loff); } return; } case AST_ADDR: switch (ast->operand->type) { case AST_LVAR: emit("lea %d(%%rbp), %%rax", ast->operand->loff); break; case AST_GVAR: emit("lea %s(%%rip), %%rax", ast->operand->glabel); break; default: error("internal error"); } break; case AST_DEREF: emit_expr(ast->operand); emit_load_deref(ast->ctype, ast->operand->ctype, 0); break; case AST_IF: case AST_TERNARY: { emit_expr(ast->cond); char *ne = make_label(); emit("test %%rax, %%rax"); emit("je %s", ne); emit_expr(ast->then); if (ast->els) { char *end = make_label(); emit("jmp %s", end); emit("%s:", ne); emit_expr(ast->els); emit("%s:", end); } else { emit("%s:", ne); } break; } case AST_FOR: { if (ast->forinit) emit_expr(ast->forinit); char *begin = make_label(); char *end = make_label(); emit("%s:", begin); if (ast->forcond) { emit_expr(ast->forcond); emit("test %%rax, %%rax"); emit("je %s", end); } emit_expr(ast->forbody); if (ast->forstep) emit_expr(ast->forstep); emit("jmp %s", begin); emit("%s:", end); break; } case AST_RETURN: emit_expr(ast->retval); emit("leave"); emit("ret"); break; case AST_COMPOUND_STMT: for (Iter i = list_iter(ast->stmts); !iter_end(i);) { emit_expr(iter_next(&i)); emit("#;"); } break; case AST_STRUCT_REF: emit_load_struct_ref(ast->struc, ast->ctype, 0); break; case PUNCT_INC: emit_inc_dec(ast, "add"); break; case PUNCT_DEC: emit_inc_dec(ast, "sub"); break; case '!': emit_expr(ast->operand); emit("cmp $0, %%rax"); emit("sete %%al"); emit("movzb %%al, %%eax"); break; case '&': emit_expr(ast->left); push("rax"); emit_expr(ast->right); pop("rcx"); emit("and %%rcx, %%rax"); break; case '|': emit_expr(ast->left); push("rax"); emit_expr(ast->right); pop("rcx"); emit("or %%rcx, %%rax"); break; case PUNCT_LOGAND: { char *end = make_label(); emit_expr(ast->left); emit("test %%rax, %%rax"); emit("mov $0, %%rax"); emit("je %s", end); emit_expr(ast->right); emit("test %%rax, %%rax"); emit("mov $0, %%rax"); emit("je %s", end); emit("mov $1, %%rax"); emit("%s:", end); break; } case PUNCT_LOGOR: { char *end = make_label(); emit_expr(ast->left); emit("test %%rax, %%rax"); emit("mov $1, %%rax"); emit("jne %s", end); emit_expr(ast->right); emit("test %%rax, %%rax"); emit("mov $1, %%rax"); emit("jne %s", end); emit("mov $0, %%rax"); emit("%s:", end); break; } default: emit_binop(ast); } } static void emit_data_int(Ast *data) { SAVE; assert(data->ctype->type != CTYPE_ARRAY); switch (data->ctype->size) { case 1: emit(".byte %d", data->ival); break; case 4: emit(".long %d", data->ival); break; case 8: emit(".quad %d", data->ival); break; default: error("internal error"); } } static void emit_data(Ast *v) { SAVE; emit_label(".global %s", v->declvar->varname); emit_label("%s:", v->declvar->varname); if (v->declinit->type == AST_ARRAY_INIT) { for (Iter iter = list_iter(v->declinit->arrayinit); !iter_end(iter);) { emit_data_int(iter_next(&iter)); } return; } assert(v->declinit->type == AST_LITERAL && is_inttype(v->declinit->ctype)); emit_data_int(v->declinit); } static void emit_bss(Ast *v) { SAVE; emit(".lcomm %s, %d", v->declvar->varname, v->declvar->ctype->size); } static void emit_global_var(Ast *v) { SAVE; if (v->declinit) emit_data(v); else emit_bss(v); } void emit_data_section(void) { SAVE; emit(".data"); for (Iter i = list_iter(strings); !iter_end(i);) { Ast *v = iter_next(&i); emit_label("%s:", v->slabel); emit(".string \"%s\"", quote_cstring(v->sval)); } for (Iter i = list_iter(flonums); !iter_end(i);) { Ast *v = iter_next(&i); char *label = make_label(); v->flabel = label; emit_label("%s:", label); emit(".long %d", v->lval[0]); emit(".long %d", v->lval[1]); } } static int align(int n, int m) { int rem = n % m; return (rem == 0) ? n : n - rem + m; } static void emit_func_prologue(Ast *func) { SAVE; emit(".text"); #ifdef __APPLE__ emit_label(".global _%s", func->fname); emit_label("_%s:", func->fname); #else emit_label(".global %s", func->fname); emit_label("%s:", func->fname); #endif push("rbp"); emit("mov %%rsp, %%rbp"); int off = 0; int ireg = 0; int xreg = 0; for (Iter i = list_iter(func->params); !iter_end(i);) { Ast *v = iter_next(&i); if (v->ctype->type == CTYPE_FLOAT) { emit("cvtpd2ps %%xmm%d, %%xmm%d", xreg, xreg); push_xmm(xreg++); } else if (v->ctype->type == CTYPE_DOUBLE) { push_xmm(xreg++); } else { push(REGS[ireg++]); } off -= align(v->ctype->size, 8); v->loff = off; } for (Iter i = list_iter(func->localvars); !iter_end(i);) { Ast *v = iter_next(&i); off -= align(v->ctype->size, 8); v->loff = off; } if (off) emit("add $%d, %%rsp", off); stackpos += -(off - 8); } static void emit_func_epilogue(void) { SAVE; emit("leave"); emit("ret"); } void emit_toplevel(Ast *v) { stackpos = 0; if (v->type == AST_FUNC) { emit_func_prologue(v); emit_expr(v->body); emit_func_epilogue(); } else if (v->type == AST_DECL) { emit_global_var(v); } else { error("internal error"); } } ================================================ FILE: dict.h ================================================ #ifndef MAZUCC_DICT_H #define MAZUCC_DICT_H #include #include #include "list.h" typedef struct Dict { List *list; struct Dict *parent; } Dict; #define EMPTY_DICT ((Dict){&EMPTY_LIST, NULL}) typedef struct { char *key; void *val; } DictEntry; static inline void *make_dict(void *parent) { Dict *r = malloc(sizeof(Dict)); r->list = make_list(); r->parent = parent; return r; } static inline void *dict_get(Dict *dict, char *key) { for (; dict; dict = dict->parent) { for (Iter i = list_iter(dict->list); !iter_end(i);) { DictEntry *e = iter_next(&i); if (!strcmp(key, e->key)) return e->val; } } return NULL; } static inline void dict_put(Dict *dict, char *key, void *val) { DictEntry *e = malloc(sizeof(DictEntry)); e->key = key; e->val = val; list_push(dict->list, e); } static inline List *dict_keys(Dict *dict) { List *r = make_list(); for (; dict; dict = dict->parent) for (Iter i = list_iter(dict->list); !iter_end(i);) list_push(r, ((DictEntry *) iter_next(&i))->key); return r; } static inline List *dict_values(Dict *dict) { List *r = make_list(); for (; dict; dict = dict->parent) for (Iter i = list_iter(dict->list); !iter_end(i);) list_push(r, ((DictEntry *) iter_next(&i))->val); return r; } static inline void *dict_parent(Dict *dict) { void *r = dict->parent; list_free(dict->list); free(dict->list); free(dict); return r; } #endif /* MAZUCC_DICT_H */ ================================================ FILE: lexer.c ================================================ #include #include #include #include "mzcc.h" #define make_null(x) make_token(TTYPE_NULL, (uintptr_t) 0) #define make_strtok(x) make_token(TTYPE_STRING, (uintptr_t) get_cstring(x)) #define make_ident(x) make_token(TTYPE_IDENT, (uintptr_t) get_cstring(x)) #define make_punct(x) make_token(TTYPE_PUNCT, (uintptr_t)(x)) #define make_number(x) make_token(TTYPE_NUMBER, (uintptr_t)(x)) #define make_char(x) make_token(TTYPE_CHAR, (uintptr_t)(x)) static bool ungotten = false; static Token ungotten_buf = {0}; static Token make_token(enum TokenType type, uintptr_t data) { return (Token){ .type = type, .priv = data, }; } static int getc_nonspace(void) { int c; while ((c = getc(stdin)) != EOF) { if (isspace(c) || c == '\n' || c == '\r') continue; return c; } return EOF; } static Token read_number(char c) { String s = make_string(); string_append(&s, c); while (1) { int c = getc(stdin); if (!isdigit(c) && !isalpha(c) && c != '.') { ungetc(c, stdin); return make_number(get_cstring(s)); } string_append(&s, c); } } static Token read_char(void) { char c = getc(stdin); if (c == EOF) goto err; if (c == '\\') { c = getc(stdin); if (c == EOF) goto err; } char c2 = getc(stdin); if (c2 == EOF) goto err; if (c2 != '\'') error("Malformed char literal"); return make_char(c); err: error("Unterminated char"); return make_null(); /* non-reachable */ } static Token read_string(void) { String s = make_string(); while (1) { int c = getc(stdin); if (c == EOF) error("Unterminated string"); if (c == '"') break; if (c == '\\') { c = getc(stdin); switch (c) { case EOF: error("Unterminated \\"); case '\"': break; case 'n': c = '\n'; break; default: error("Unknown quote: %c", c); } } string_append(&s, c); } return make_strtok(s); } static Token read_ident(char c) { String s = make_string(); string_append(&s, c); while (1) { int c2 = getc(stdin); if (isalnum(c2) || c2 == '_') { string_append(&s, c2); } else { ungetc(c2, stdin); return make_ident(s); } } } static void skip_line_comment(void) { while (1) { int c = getc(stdin); if (c == '\n' || c == EOF) return; } } static void skip_block_comment(void) { enum { in_comment, asterisk_read } state = in_comment; while (1) { int c = getc(stdin); if (state == in_comment) { if (c == '*') state = asterisk_read; } else if (c == '/') { return; } } } static Token read_rep(int expect, int t1, int t2) { int c = getc(stdin); if (c == expect) return make_punct(t2); ungetc(c, stdin); return make_punct(t1); } static Token read_token_int(void) { int c = getc_nonspace(); switch (c) { case '0' ... '9': return read_number(c); case 'a' ... 'z': case 'A' ... 'Z': case '_': return read_ident(c); case '/': { c = getc(stdin); if (c == '/') { skip_line_comment(); return read_token_int(); } if (c == '*') { skip_block_comment(); return read_token_int(); } ungetc(c, stdin); return make_punct('/'); } case '*': case '(': case ')': case ',': case ';': case '.': case '[': case ']': case '{': case '}': case '!': case '?': case ':': return make_punct(c); case '-': c = getc(stdin); if (c == '-') return make_punct(PUNCT_DEC); if (c == '>') return make_punct(PUNCT_ARROW); ungetc(c, stdin); return make_punct('-'); case '=': return read_rep('=', '=', PUNCT_EQ); case '+': return read_rep('+', '+', PUNCT_INC); case '&': return read_rep('&', '&', PUNCT_LOGAND); case '|': return read_rep('|', '|', PUNCT_LOGOR); case '<': return read_rep('<', '<', PUNCT_LSHIFT); case '>': return read_rep('>', '>', PUNCT_RSHIFT); case '"': return read_string(); case '\'': return read_char(); case EOF: return make_null(); default: error("Unexpected character: '%c'", c); return make_null(); /* non-reachable */ } } bool is_punct(const Token tok, int c) { return (get_ttype(tok) == TTYPE_PUNCT) && (get_punct(tok) == c); } void unget_token(const Token tok) { if (get_ttype(tok) == TTYPE_NULL) return; if (ungotten) error("Push back buffer is already full"); ungotten = true; ungotten_buf = make_token(tok.type, tok.priv); } Token peek_token(void) { Token tok = read_token(); unget_token(tok); return tok; } Token read_token(void) { if (ungotten) { ungotten = false; return make_token(ungotten_buf.type, ungotten_buf.priv); } return read_token_int(); } ================================================ FILE: list.h ================================================ #ifndef MAZUCC_LIST_H #define MAZUCC_LIST_H #include #include typedef struct __ListNode { void *elem; struct __ListNode *next, *prev; } ListNode; typedef struct { int len; ListNode *head, *tail; } List; typedef struct { ListNode *ptr; } Iter; #define EMPTY_LIST ((List){.len = 0, .head = NULL, .tail = NULL}) static inline List *make_list(void) { List *r = malloc(sizeof(List)); r->len = 0; r->head = r->tail = NULL; return r; } static inline void *make_node(void *elem) { ListNode *r = malloc(sizeof(ListNode)); r->elem = elem; r->next = NULL; r->prev = NULL; return r; } static inline void list_push(List *list, void *elem) { ListNode *node = make_node(elem); if (!list->head) { list->head = node; } else { list->tail->next = node; node->prev = list->tail; } list->tail = node; list->len++; } static inline void *list_pop(List *list) { if (!list->head) return NULL; ListNode *tail = list->tail; void *r = tail->elem; list->tail = tail->prev; if (list->tail) list->tail->next = NULL; else list->head = NULL; free(tail); return r; } static void list_unshift(List *list, void *elem) { ListNode *node = make_node(elem); node->next = list->head; if (list->head) list->head->prev = node; list->head = node; if (!list->tail) list->tail = node; list->len++; } static inline Iter list_iter(void *ptr) { return (Iter){ .ptr = ((List *) ptr)->head, }; } static inline bool iter_end(const Iter iter) { return !iter.ptr; } static inline void *iter_next(Iter *iter) { if (!iter->ptr) return NULL; void *r = iter->ptr->elem; iter->ptr = iter->ptr->next; return r; } static inline List *list_reverse(List *list) { List *r = make_list(); for (Iter i = list_iter(list); !iter_end(i);) list_unshift(r, iter_next(&i)); return r; } static inline int list_len(List *list) { return list->len; } #define list_safe_next(node) ((node) ? (node)->next : NULL) #define list_for_each_safe(node, tmp, list) \ for ((node) = (list)->head, (tmp) = list_safe_next(node); (node); \ (node) = (tmp), (tmp) = list_safe_next(node)) static inline void list_free(List *list) { ListNode *node, *tmp; list_for_each_safe (node, tmp, list) { free(node->elem); free(node); } } #endif /* MAZUCC_LIST_H */ ================================================ FILE: main.c ================================================ #include #include #include #include "mzcc.h" static char *outfile = NULL, *infile = NULL; extern FILE *outfp; static bool dump_ast; static void usage() { fprintf(stdout, "mzcc [options] filename\n" "OPTIONS\n" " -o filename Write output to the specified file.\n" " --dump-ast Dump abstract syntax tree(AST)\n"); } static void print_usage_and_exit() { usage(); exit(1); } static void parse_args(int argc, char **argv) { if (argc < 2) { print_usage_and_exit(); } while (true) { argc--; argv++; if (!argc) { break; } if ((*argv)[0] == '-') { switch ((*argv)[1]) { case '\0': infile = "/dev/stdin"; break; case 'o': argc--; argv++; outfile = *argv; break; case '-': if (!strcmp(*argv, "--dump-ast")) { dump_ast = true; break; } default: print_usage_and_exit(); } } else { if (infile) { // The second non-option argument is not what we expect. print_usage_and_exit(); } infile = argv[0]; } } } static void open_output_file() { if (outfile) { if (!(outfp = fopen(outfile, "w"))) { printf("Can not open file %s\n", outfile); exit(1); } } else { outfp = stdout; } } static void open_input_file() { if (!infile) { printf("Input file is not specified\n\n"); print_usage_and_exit(); } if (!freopen(infile, "r", stdin)) { printf("Can not open file %s\n", infile); exit(1); } } int main(int argc, char **argv) { parse_args(argc, argv); open_input_file(); open_output_file(); List *toplevels = read_toplevels(); if (!dump_ast) emit_data_section(); for (Iter i = list_iter(toplevels); !iter_end(i);) { Ast *v = iter_next(&i); if (dump_ast) printf("%s", ast_to_string(v)); else emit_toplevel(v); } list_free(cstrings); list_free(ctypes); return 0; } ================================================ FILE: mzcc.h ================================================ #ifndef MAZUCC_H #define MAZUCC_H #include #include #include "dict.h" #include "list.h" #include "util.h" enum TokenType { TTYPE_NULL, TTYPE_IDENT, TTYPE_PUNCT, TTYPE_NUMBER, TTYPE_CHAR, TTYPE_STRING, }; typedef struct { int type; uintptr_t priv; } Token; enum { AST_LITERAL = 256, AST_STRING, AST_LVAR, AST_GVAR, AST_FUNCALL, AST_FUNC, AST_DECL, AST_ARRAY_INIT, AST_ADDR, AST_DEREF, AST_IF, AST_TERNARY, AST_FOR, AST_RETURN, AST_COMPOUND_STMT, AST_STRUCT_REF, PUNCT_EQ, PUNCT_INC, PUNCT_DEC, PUNCT_LOGAND, PUNCT_LOGOR, PUNCT_ARROW, PUNCT_LSHIFT, PUNCT_RSHIFT, }; enum { CTYPE_VOID, CTYPE_CHAR, CTYPE_INT, CTYPE_LONG, CTYPE_FLOAT, CTYPE_DOUBLE, CTYPE_ARRAY, CTYPE_PTR, CTYPE_STRUCT, }; typedef struct __Ctype { int type; int size; struct __Ctype *ptr; /* pointer or array */ int len; /* array length */ /* struct */ Dict *fields; int offset; } Ctype; typedef struct __Ast { int type; Ctype *ctype; union { /* char, int, or long */ long ival; /* float or double */ struct { union { double fval; int lval[2]; }; char *flabel; }; /* string literal */ struct { char *sval; char *slabel; }; /* Local/global variable */ struct { char *varname; struct { int loff; char *glabel; }; }; /* Binary operator */ struct { struct __Ast *left; struct __Ast *right; }; /* Unary operator */ struct { struct __Ast *operand; }; /* Function call or function declaration */ struct { char *fname; struct { List *args; struct { List *params; List *localvars; struct __Ast *body; }; }; }; /* Declaration */ struct { struct __Ast *declvar; struct __Ast *declinit; }; /* Array initializer */ List *arrayinit; /* if statement or ternary operator */ struct { struct __Ast *cond; struct __Ast *then; struct __Ast *els; }; /* for statement */ struct { struct __Ast *forinit; struct __Ast *forcond; struct __Ast *forstep; struct __Ast *forbody; }; /* return statement */ struct __Ast *retval; /* Compound statement */ List *stmts; /* Struct reference */ struct { struct __Ast *struc; char *field; /* specific to ast_to_string only */ }; }; } Ast; /* verbose.c */ extern char *token_to_string(const Token tok); extern char *ast_to_string(Ast *ast); extern char *ctype_to_string(Ctype *ctype); /* lexer.c */ extern bool is_punct(const Token tok, int c); extern void unget_token(const Token tok); extern Token peek_token(void); extern Token read_token(void); #define get_priv(tok, type) \ ({ \ assert(__builtin_types_compatible_p(typeof(tok), Token)); \ ((type) tok.priv); \ }) #define get_ttype(tok) \ ({ \ assert(__builtin_types_compatible_p(typeof(tok), Token)); \ (tok.type); \ }) #define get_token(tok, ttype, priv_type) \ ({ \ assert(get_ttype(tok) == ttype); \ get_priv(tok, priv_type); \ }) #define get_char(tok) get_token(tok, TTYPE_CHAR, char) #define get_strtok(tok) get_token(tok, TTYPE_STRING, char *) #define get_ident(tok) get_token(tok, TTYPE_IDENT, char *) #define get_number(tok) get_token(tok, TTYPE_NUMBER, char *) #define get_punct(tok) get_token(tok, TTYPE_PUNCT, int) /* parser.c */ extern List *strings; extern List *flonums; extern List *ctypes; extern char *make_label(void); extern List *read_toplevels(void); extern bool is_inttype(Ctype *ctype); extern bool is_flotype(Ctype *ctype); /* codegen_x64.c */ extern void emit_data_section(void); extern void emit_toplevel(Ast *v); #endif /* MAZUCC_H */ ================================================ FILE: parser.c ================================================ #include #include #include #include #include #include #include "mzcc.h" #define MAX_ARGS 6 #define MAX_OP_PRIO 16 #define MAX_ALIGN 16 List *ctypes = &EMPTY_LIST; List *strings = &EMPTY_LIST; List *flonums = &EMPTY_LIST; static Dict *globalenv = &EMPTY_DICT; static Dict *localenv = NULL; static Dict *struct_defs = &EMPTY_DICT; static Dict *union_defs = &EMPTY_DICT; static List *localvars = NULL; static Ctype *ctype_void = &(Ctype){CTYPE_VOID, 0, NULL}; static Ctype *ctype_int = &(Ctype){CTYPE_INT, 4, NULL}; static Ctype *ctype_long = &(Ctype){CTYPE_LONG, 8, NULL}; static Ctype *ctype_char = &(Ctype){CTYPE_CHAR, 1, NULL}; static Ctype *ctype_float = &(Ctype){CTYPE_FLOAT, 4, NULL}; static Ctype *ctype_double = &(Ctype){CTYPE_DOUBLE, 8, NULL}; static int labelseq = 0; static Ast *read_expr(void); static Ctype *make_ptr_type(Ctype *ctype); static Ctype *make_array_type(Ctype *ctype, int size); static Ast *read_compound_stmt(void); static Ast *read_decl_or_stmt(void); static Ctype *result_type(char op, Ctype *a, Ctype *b); static Ctype *convert_array(Ctype *ctype); static Ast *read_stmt(void); static Ctype *read_decl_int(Token *name); static Ast *ast_uop(int type, Ctype *ctype, Ast *operand) { Ast *r = malloc(sizeof(Ast)); r->type = type; r->ctype = ctype; r->operand = operand; return r; } static Ast *ast_binop(int type, Ast *left, Ast *right) { Ast *r = malloc(sizeof(Ast)); r->type = type; r->ctype = result_type(type, left->ctype, right->ctype); if (type != '=' && convert_array(left->ctype)->type != CTYPE_PTR && convert_array(right->ctype)->type == CTYPE_PTR) { r->left = right; r->right = left; } else { r->left = left; r->right = right; } return r; } static Ast *ast_inttype(Ctype *ctype, long val) { Ast *r = malloc(sizeof(Ast)); r->type = AST_LITERAL; r->ctype = ctype; r->ival = val; return r; } static Ast *ast_double(double val) { Ast *r = malloc(sizeof(Ast)); r->type = AST_LITERAL; r->ctype = ctype_double; r->fval = val; list_push(flonums, r); return r; } char *make_label(void) { String s = make_string(); string_appendf(&s, ".L%d", labelseq++); return get_cstring(s); } static Ast *ast_lvar(Ctype *ctype, char *name) { Ast *r = malloc(sizeof(Ast)); r->type = AST_LVAR; r->ctype = ctype; r->varname = name; dict_put(localenv, name, r); if (localvars) list_push(localvars, r); return r; } static Ast *ast_gvar(Ctype *ctype, char *name, bool filelocal) { Ast *r = malloc(sizeof(Ast)); r->type = AST_GVAR; r->ctype = ctype; r->varname = name; r->glabel = filelocal ? make_label() : name; dict_put(globalenv, name, r); return r; } static Ast *ast_string(char *str) { Ast *r = malloc(sizeof(Ast)); r->type = AST_STRING; r->ctype = make_array_type(ctype_char, strlen(str) + 1); r->sval = str; r->slabel = make_label(); return r; } static Ast *ast_funcall(Ctype *ctype, char *fname, List *args) { Ast *r = malloc(sizeof(Ast)); r->type = AST_FUNCALL; r->ctype = ctype; r->fname = fname; r->args = args; return r; } static Ast *ast_func(Ctype *rettype, char *fname, List *params, Ast *body, List *localvars) { Ast *r = malloc(sizeof(Ast)); r->type = AST_FUNC; r->ctype = rettype; r->fname = fname; r->params = params; r->localvars = localvars; r->body = body; return r; } static Ast *ast_decl(Ast *var, Ast *init) { Ast *r = malloc(sizeof(Ast)); r->type = AST_DECL; r->ctype = NULL; r->declvar = var; r->declinit = init; return r; } static Ast *ast_array_init(List *arrayinit) { Ast *r = malloc(sizeof(Ast)); r->type = AST_ARRAY_INIT; r->ctype = NULL; r->arrayinit = arrayinit; return r; } static Ast *ast_if(Ast *cond, Ast *then, Ast *els) { Ast *r = malloc(sizeof(Ast)); r->type = AST_IF; r->ctype = NULL; r->cond = cond; r->then = then; r->els = els; return r; } static Ast *ast_ternary(Ctype *ctype, Ast *cond, Ast *then, Ast *els) { Ast *r = malloc(sizeof(Ast)); r->type = AST_TERNARY; r->ctype = ctype; r->cond = cond; r->then = then; r->els = els; return r; } static Ast *ast_for(Ast *init, Ast *cond, Ast *step, Ast *body) { Ast *r = malloc(sizeof(Ast)); r->type = AST_FOR; r->ctype = NULL; r->forinit = init; r->forcond = cond; r->forstep = step; r->forbody = body; return r; } static Ast *ast_return(Ast *retval) { Ast *r = malloc(sizeof(Ast)); r->type = AST_RETURN; r->ctype = NULL; r->retval = retval; return r; } static Ast *ast_compound_stmt(List *stmts) { Ast *r = malloc(sizeof(Ast)); r->type = AST_COMPOUND_STMT; r->ctype = NULL; r->stmts = stmts; return r; } static Ast *ast_struct_ref(Ctype *ctype, Ast *struc, char *name) { Ast *r = malloc(sizeof(Ast)); r->type = AST_STRUCT_REF; r->ctype = ctype; r->struc = struc; r->field = name; return r; } static Ctype *make_ptr_type(Ctype *ctype) { Ctype *r = malloc(sizeof(Ctype)); r->type = CTYPE_PTR; r->ptr = ctype; r->size = 8; list_push(ctypes, r); return r; } static Ctype *make_array_type(Ctype *ctype, int len) { Ctype *r = malloc(sizeof(Ctype)); r->type = CTYPE_ARRAY; r->ptr = ctype; r->size = (len < 0) ? -1 : ctype->size * len; r->len = len; list_push(ctypes, r); return r; } static Ctype *make_struct_field_type(Ctype *ctype, int offset) { Ctype *r = malloc(sizeof(Ctype)); memcpy(r, ctype, sizeof(Ctype)); r->offset = offset; list_push(ctypes, r); return r; } static Ctype *make_struct_type(Dict *fields, int size) { Ctype *r = malloc(sizeof(Ctype)); r->type = CTYPE_STRUCT; r->fields = fields; r->size = size; list_push(ctypes, r); return r; } bool is_inttype(Ctype *ctype) { return ctype->type == CTYPE_CHAR || ctype->type == CTYPE_INT || ctype->type == CTYPE_LONG; } bool is_flotype(Ctype *ctype) { return ctype->type == CTYPE_FLOAT || ctype->type == CTYPE_DOUBLE; } static void ensure_lvalue(Ast *ast) { switch (ast->type) { case AST_LVAR: case AST_GVAR: case AST_DEREF: case AST_STRUCT_REF: return; default: error("lvalue expected, but got %s", ast_to_string(ast)); } } static void expect(char punct) { Token tok = read_token(); if (!is_punct(tok, punct)) error("'%c' expected, but got %s", punct, token_to_string(tok)); } static bool is_ident(const Token tok, char *s) { return get_ttype(tok) == TTYPE_IDENT && !strcmp(get_ident(tok), s); } static bool is_right_assoc(const Token tok) { return get_punct(tok) == '='; } static int eval_intexpr(Ast *ast) { switch (ast->type) { case AST_LITERAL: if (is_inttype(ast->ctype)) return ast->ival; error("Integer expression expected, but got %s", ast_to_string(ast)); case '+': return eval_intexpr(ast->left) + eval_intexpr(ast->right); case '-': return eval_intexpr(ast->left) - eval_intexpr(ast->right); case '*': return eval_intexpr(ast->left) * eval_intexpr(ast->right); case '/': return eval_intexpr(ast->left) / eval_intexpr(ast->right); case PUNCT_LSHIFT: return eval_intexpr(ast->left) << eval_intexpr(ast->right); case PUNCT_RSHIFT: return eval_intexpr(ast->left) >> eval_intexpr(ast->right); default: error("Integer expression expected, but got %s", ast_to_string(ast)); return 0; /* non-reachable */ } } static int priority(const Token tok) { switch (get_punct(tok)) { case '[': case '.': case PUNCT_ARROW: return 1; case PUNCT_INC: case PUNCT_DEC: return 2; case '*': case '/': return 3; case '+': case '-': return 4; case PUNCT_LSHIFT: case PUNCT_RSHIFT: return 5; case '<': case '>': return 6; case '&': return 8; case '|': return 10; case PUNCT_EQ: return 7; case PUNCT_LOGAND: return 11; case PUNCT_LOGOR: return 12; case '?': return 13; case '=': return 14; default: return -1; } } static Ast *read_func_args(char *fname) { List *args = make_list(); while (1) { Token tok = read_token(); if (is_punct(tok, ')')) break; unget_token(tok); list_push(args, read_expr()); tok = read_token(); if (is_punct(tok, ')')) break; if (!is_punct(tok, ',')) error("Unexpected token: '%s'", token_to_string(tok)); } if (MAX_ARGS < list_len(args)) error("Too many arguments: %s", fname); return ast_funcall(ctype_int, fname, args); } static Ast *read_ident_or_func(char *name) { Token tok = read_token(); if (is_punct(tok, '(')) return read_func_args(name); unget_token(tok); Ast *v = dict_get(localenv, name); if (!v) error("Undefined varaible: %s", name); return v; } static bool is_long_token(char *p) { for (; *p; p++) { if (!isdigit(*p)) return (*p == 'L' || *p == 'l') && p[1] == '\0'; } return false; } static bool is_int_token(char *p) { for (; *p; p++) if (!isdigit(*p)) return false; return true; } static bool is_float_token(char *p) { for (; *p; p++) if (!isdigit(*p)) break; if (*p++ != '.') return false; for (; *p; p++) if (!isdigit(*p)) return false; return true; } static Ast *read_prim(void) { Token tok = read_token(); switch (get_ttype(tok)) { case TTYPE_NULL: return NULL; case TTYPE_IDENT: return read_ident_or_func(get_ident(tok)); case TTYPE_NUMBER: { char *number = get_number(tok); if (is_long_token(number)) return ast_inttype(ctype_long, atol(number)); if (is_int_token(number)) { long val = atol(number); if (val & ~(long) UINT_MAX) return ast_inttype(ctype_long, val); return ast_inttype(ctype_int, val); } if (is_float_token(number)) return ast_double(atof(number)); error("Malformed number: %s", token_to_string(tok)); } case TTYPE_CHAR: return ast_inttype(ctype_char, get_char(tok)); case TTYPE_STRING: { Ast *r = ast_string(get_strtok(tok)); list_push(strings, r); return r; } case TTYPE_PUNCT: unget_token(tok); return NULL; default: error("internal error: unknown token type: %d", get_ttype(tok)); return NULL; /* non-reachable */ } } #define swap(a, b) \ { \ typeof(a) tmp = b; \ b = a; \ a = tmp; \ } static Ctype *result_type_int(jmp_buf *jmpbuf, char op, Ctype *a, Ctype *b) { if (a->type > b->type) swap(a, b); if (b->type == CTYPE_PTR) { if (op == '=') return a; if (op != '+' && op != '-') goto err; if (!is_inttype(a)) goto err; return b; } switch (a->type) { case CTYPE_VOID: goto err; case CTYPE_CHAR: case CTYPE_INT: switch (b->type) { case CTYPE_CHAR: case CTYPE_INT: return ctype_int; case CTYPE_LONG: return ctype_long; case CTYPE_FLOAT: case CTYPE_DOUBLE: return ctype_double; case CTYPE_ARRAY: case CTYPE_PTR: return b; } error("internal error"); case CTYPE_LONG: switch (b->type) { case CTYPE_LONG: return ctype_long; case CTYPE_FLOAT: case CTYPE_DOUBLE: return ctype_double; case CTYPE_ARRAY: case CTYPE_PTR: return b; } error("internal error"); case CTYPE_FLOAT: if (b->type == CTYPE_FLOAT || b->type == CTYPE_DOUBLE) return ctype_double; goto err; case CTYPE_DOUBLE: if (b->type == CTYPE_DOUBLE) return ctype_double; goto err; case CTYPE_ARRAY: if (b->type != CTYPE_ARRAY) goto err; return result_type_int(jmpbuf, op, a->ptr, b->ptr); default: error("internal error: %s %s", ctype_to_string(a), ctype_to_string(b)); } err: longjmp(*jmpbuf, 1); } static Ast *read_subscript_expr(Ast *ast) { Ast *sub = read_expr(); expect(']'); Ast *t = ast_binop('+', ast, sub); return ast_uop(AST_DEREF, t->ctype->ptr, t); } static Ctype *convert_array(Ctype *ctype) { if (ctype->type != CTYPE_ARRAY) return ctype; return make_ptr_type(ctype->ptr); } static Ctype *result_type(char op, Ctype *a, Ctype *b) { jmp_buf jmpbuf; if (setjmp(jmpbuf) == 0) return result_type_int(&jmpbuf, op, convert_array(a), convert_array(b)); error("incompatible operands: %c: <%s> and <%s>", op, ctype_to_string(a), ctype_to_string(b)); return NULL; /* non-reachable */ } static Ast *read_unary_expr(void) { Token tok = read_token(); if (get_ttype(tok) != TTYPE_PUNCT) { unget_token(tok); return read_prim(); } if (is_punct(tok, '(')) { Ast *r = read_expr(); expect(')'); return r; } if (is_punct(tok, '&')) { Ast *operand = read_unary_expr(); ensure_lvalue(operand); return ast_uop(AST_ADDR, make_ptr_type(operand->ctype), operand); } if (is_punct(tok, '!')) { Ast *operand = read_unary_expr(); return ast_uop('!', ctype_int, operand); } if (is_punct(tok, '*')) { Ast *operand = read_unary_expr(); Ctype *ctype = convert_array(operand->ctype); if (ctype->type != CTYPE_PTR) error("pointer type expected, but got %s", ast_to_string(operand)); if (ctype->ptr == ctype_void) error("pointer to void can not be dereferenced, but got %s", ast_to_string(operand)); return ast_uop(AST_DEREF, operand->ctype->ptr, operand); } unget_token(tok); return read_prim(); } static Ast *read_cond_expr(Ast *cond) { Ast *then = read_expr(); expect(':'); Ast *els = read_expr(); return ast_ternary(then->ctype, cond, then, els); } static Ast *read_struct_field(Ast *struc) { if (struc->ctype->type != CTYPE_STRUCT) error("struct expected, but got %s", ast_to_string(struc)); Token name = read_token(); if (get_ttype(name) != TTYPE_IDENT) error("field name expected, but got %s", token_to_string(name)); char *ident = get_ident(name); Ctype *field = dict_get(struc->ctype->fields, ident); return ast_struct_ref(field, struc, ident); } static Ast *read_expr_int(int prec) { Ast *ast = read_unary_expr(); if (!ast) return NULL; while (1) { Token tok = read_token(); if (get_ttype(tok) != TTYPE_PUNCT) { unget_token(tok); return ast; } int prec2 = priority(tok); if (prec2 < 0 || prec <= prec2) { unget_token(tok); return ast; } if (is_punct(tok, '?')) { ast = read_cond_expr(ast); continue; } if (is_punct(tok, '.')) { ast = read_struct_field(ast); continue; } if (is_punct(tok, PUNCT_ARROW)) { if (ast->ctype->type != CTYPE_PTR) error("pointer type expected, but got %s %s", ctype_to_string(ast->ctype), ast_to_string(ast)); ast = ast_uop(AST_DEREF, ast->ctype->ptr, ast); ast = read_struct_field(ast); continue; } if (is_punct(tok, '[')) { ast = read_subscript_expr(ast); continue; } // this is BUG!! ++ should be in read_unary_expr() , I think. if (is_punct(tok, PUNCT_INC) || is_punct(tok, PUNCT_DEC)) { ensure_lvalue(ast); ast = ast_uop(get_punct(tok), ast->ctype, ast); continue; } if (is_punct(tok, '=')) ensure_lvalue(ast); Ast *rest = read_expr_int(prec2 + (is_right_assoc(tok) ? 1 : 0)); if (!rest) error("second operand missing"); if (is_punct(tok, PUNCT_LSHIFT) || is_punct(tok, PUNCT_RSHIFT)) { if ((ast->ctype != ctype_int && ast->ctype != ctype_char) || (rest->ctype != ctype_int && rest->ctype != ctype_char)) error("invalid operand to shift"); } ast = ast_binop(get_punct(tok), ast, rest); } } static Ast *read_expr(void) { return read_expr_int(MAX_OP_PRIO); } static Ctype *get_ctype(const Token tok) { if (get_ttype(tok) != TTYPE_IDENT) return NULL; char *ident = get_ident(tok); if (!strcmp(ident, "void")) return ctype_void; if (!strcmp(ident, "int")) return ctype_int; if (!strcmp(ident, "long")) return ctype_long; if (!strcmp(ident, "char")) return ctype_char; if (!strcmp(ident, "float")) return ctype_float; if (!strcmp(ident, "double")) return ctype_double; return NULL; } static bool is_type_keyword(const Token tok) { return get_ctype(tok) || is_ident(tok, "struct") || is_ident(tok, "union"); } static Ast *read_decl_array_init_int(Ctype *ctype) { Token tok = read_token(); if (ctype->ptr->type == CTYPE_CHAR && get_ttype(tok) == TTYPE_STRING) return ast_string(get_strtok(tok)); if (!is_punct(tok, '{')) error("Expected an initializer list for %s, but got %s", ctype_to_string(ctype), token_to_string(tok)); List *initlist = make_list(); while (1) { Token tok = read_token(); if (is_punct(tok, '}')) break; unget_token(tok); Ast *init = read_expr(); list_push(initlist, init); result_type('=', init->ctype, ctype->ptr); tok = read_token(); if (!is_punct(tok, ',')) unget_token(tok); } return ast_array_init(initlist); } static char *read_struct_union_tag(void) { Token tok = read_token(); if (get_ttype(tok) == TTYPE_IDENT) return get_ident(tok); unget_token(tok); return NULL; } static Dict *read_struct_union_fields(void) { Dict *r = make_dict(NULL); expect('{'); while (1) { if (!is_type_keyword(peek_token())) break; Token name; Ctype *fieldtype = read_decl_int(&name); dict_put(r, get_ident(name), make_struct_field_type(fieldtype, 0)); expect(';'); } expect('}'); return r; } static Ctype *read_union_def(void) { char *tag = read_struct_union_tag(); Ctype *ctype = dict_get(union_defs, tag); if (ctype) return ctype; Dict *fields = read_struct_union_fields(); int maxsize = 0; for (Iter i = list_iter(dict_values(fields)); !iter_end(i);) { Ctype *fieldtype = iter_next(&i); maxsize = (maxsize < fieldtype->size) ? fieldtype->size : maxsize; } Ctype *r = make_struct_type(fields, maxsize); if (tag) dict_put(union_defs, tag, r); return r; } static Ctype *read_struct_def(void) { char *tag = read_struct_union_tag(); Ctype *ctype = dict_get(struct_defs, tag); if (ctype) return ctype; Dict *fields = read_struct_union_fields(); int offset = 0; for (Iter i = list_iter(dict_values(fields)); !iter_end(i);) { Ctype *fieldtype = iter_next(&i); int size = (fieldtype->size < MAX_ALIGN) ? fieldtype->size : MAX_ALIGN; if (offset % size != 0) offset += size - offset % size; fieldtype->offset = offset; offset += fieldtype->size; } Ctype *r = make_struct_type(fields, offset); if (tag) dict_put(struct_defs, tag, r); return r; } static Ctype *read_decl_spec(void) { Token tok = read_token(); Ctype *ctype = is_ident(tok, "struct") ? read_struct_def() : is_ident(tok, "union") ? read_union_def() : get_ctype(tok); if (!ctype) error("Type expected, but got %s", token_to_string(tok)); while (1) { tok = read_token(); if (!is_punct(tok, '*')) { unget_token(tok); return ctype; } ctype = make_ptr_type(ctype); } } static Ast *read_decl_init_val(Ast *var) { if (var->ctype->type == CTYPE_ARRAY) { Ast *init = read_decl_array_init_int(var->ctype); int len = (init->type == AST_STRING) ? strlen(init->sval) + 1 : list_len(init->arrayinit); if (var->ctype->len == -1) { var->ctype->len = len; var->ctype->size = len * var->ctype->ptr->size; } else if (var->ctype->len != len) { error("Invalid array initializer: expected %d items but got %d", var->ctype->len, len); } expect(';'); return ast_decl(var, init); } Ast *init = read_expr(); expect(';'); if (var->type == AST_GVAR) init = ast_inttype(ctype_int, eval_intexpr(init)); return ast_decl(var, init); } static Ctype *read_array_dimensions_int(Ctype *basetype) { Token tok = read_token(); if (!is_punct(tok, '[')) { unget_token(tok); return NULL; } int dim = -1; if (!is_punct(peek_token(), ']')) { Ast *size = read_expr(); dim = eval_intexpr(size); } expect(']'); Ctype *sub = read_array_dimensions_int(basetype); if (sub) { if (sub->len == -1 && dim == -1) error("Array size is not specified"); return make_array_type(sub, dim); } return make_array_type(basetype, dim); } static Ctype *read_array_dimensions(Ctype *basetype) { Ctype *ctype = read_array_dimensions_int(basetype); return ctype ? ctype : basetype; } static Ast *read_decl_init(Ast *var) { Token tok = read_token(); if (is_punct(tok, '=')) return read_decl_init_val(var); if (var->ctype->len == -1) error("Missing array initializer"); unget_token(tok); expect(';'); return ast_decl(var, NULL); } static Ctype *read_decl_int(Token *name) { Ctype *ctype = read_decl_spec(); *name = read_token(); if (get_ttype((*name)) != TTYPE_IDENT) error("Identifier expected, but got %s", token_to_string(*name)); return read_array_dimensions(ctype); } static Ast *read_decl(void) { Token varname; Ctype *ctype = read_decl_int(&varname); if (ctype == ctype_void) error("Storage size of '%s' is not known", token_to_string(varname)); Ast *var = ast_lvar(ctype, get_ident(varname)); return read_decl_init(var); } static Ast *read_if_stmt(void) { expect('('); Ast *cond = read_expr(); expect(')'); Ast *then = read_stmt(); Token tok = read_token(); if (get_ttype(tok) != TTYPE_IDENT || strcmp(get_ident(tok), "else")) { unget_token(tok); return ast_if(cond, then, NULL); } Ast *els = read_stmt(); return ast_if(cond, then, els); } static Ast *read_opt_decl_or_stmt(void) { Token tok = read_token(); if (is_punct(tok, ';')) return NULL; unget_token(tok); return read_decl_or_stmt(); } static Ast *read_opt_expr(void) { Token tok = read_token(); if (is_punct(tok, ';')) return NULL; unget_token(tok); Ast *r = read_expr(); expect(';'); return r; } static Ast *read_for_stmt(void) { expect('('); localenv = make_dict(localenv); Ast *init = read_opt_decl_or_stmt(); Ast *cond = read_opt_expr(); Ast *step = is_punct(peek_token(), ')') ? NULL : read_expr(); expect(')'); Ast *body = read_stmt(); localenv = dict_parent(localenv); return ast_for(init, cond, step, body); } static Ast *read_return_stmt(void) { Ast *retval = read_expr(); expect(';'); return ast_return(retval); } static Ast *read_stmt(void) { Token tok = read_token(); if (is_ident(tok, "if")) return read_if_stmt(); if (is_ident(tok, "for")) return read_for_stmt(); if (is_ident(tok, "return")) return read_return_stmt(); if (is_punct(tok, '{')) return read_compound_stmt(); unget_token(tok); Ast *r = read_expr(); expect(';'); return r; } static Ast *read_decl_or_stmt(void) { Token tok = peek_token(); if (get_ttype(tok) == TTYPE_NULL) return NULL; return is_type_keyword(tok) ? read_decl() : read_stmt(); } static Ast *read_compound_stmt(void) { localenv = make_dict(localenv); List *list = make_list(); while (1) { Ast *stmt = read_decl_or_stmt(); if (stmt) list_push(list, stmt); if (!stmt) break; Token tok = read_token(); if (is_punct(tok, '}')) break; unget_token(tok); } localenv = dict_parent(localenv); return ast_compound_stmt(list); } static List *read_params(void) { List *params = make_list(); Token tok = read_token(); if (is_punct(tok, ')')) return params; unget_token(tok); while (1) { Ctype *ctype = read_decl_spec(); Token pname = read_token(); if (get_ttype(pname) != TTYPE_IDENT) error("Identifier expected, but got %s", token_to_string(pname)); ctype = read_array_dimensions(ctype); if (ctype->type == CTYPE_ARRAY) ctype = make_ptr_type(ctype->ptr); list_push(params, ast_lvar(ctype, get_ident(pname))); Token tok = read_token(); if (is_punct(tok, ')')) return params; if (!is_punct(tok, ',')) error("Comma expected, but got %s", token_to_string(tok)); } } static Ast *read_func_def(Ctype *rettype, char *fname) { expect('('); localenv = make_dict(globalenv); List *params = read_params(); expect('{'); localenv = make_dict(localenv); localvars = make_list(); Ast *body = read_compound_stmt(); Ast *r = ast_func(rettype, fname, params, body, localvars); localenv = dict_parent(localenv); localenv = dict_parent(localenv); localvars = NULL; return r; } static Ast *read_decl_or_func_def(void) { Token tok = peek_token(); if (get_ttype(tok) == TTYPE_NULL) return NULL; Ctype *ctype = read_decl_spec(); Token name = read_token(); char *ident; if (get_ttype(name) != TTYPE_IDENT) error("Identifier expected, but got %s", token_to_string(name)); ident = get_ident(name); tok = peek_token(); if (is_punct(tok, '(')) return read_func_def(ctype, ident); if (ctype == ctype_void) error("Storage size of '%s' is not known", token_to_string(name)); ctype = read_array_dimensions(ctype); if (is_punct(tok, '=') || ctype->type == CTYPE_ARRAY) { Ast *var = ast_gvar(ctype, ident, false); return read_decl_init(var); } if (is_punct(tok, ';')) { read_token(); Ast *var = ast_gvar(ctype, ident, false); return ast_decl(var, NULL); } error("Don't know how to handle %s", token_to_string(tok)); return NULL; /* non-reachable */ } List *read_toplevels(void) { List *r = make_list(); while (1) { Ast *ast = read_decl_or_func_def(); if (!ast) return r; list_push(r, ast); } list_free(globalenv->list); return r; } ================================================ FILE: sample/nqueen.c ================================================ int conflict(int board[][8], int row, int col) { for (int i = 0; i < row; i++) { if (board[i][col]) return 1; int j = row - i; if (0 < col - j + 1 && board[i][col - j]) return 1; if (col + j < 8 && board[i][col + j]) return 1; } return 0; } int print_board(int board[][8]) { for (int i = 0; i < 8; i++) { for (int j = 0; j < 8; j++) printf(board[i][j] ? "Q " : ". "); printf("\n"); } printf("\n\n"); } int solve(int board[][8], int row) { if (row == 8) { print_board(board); return 0; } for (int i = 0; i < 8; i++) { if (!conflict(board, row, i)) { board[row][i] = 1; solve(board, row + 1); board[row][i] = 0; } } } int main() { int board[64]; for (int i = 0; i < 64; i++) board[i] = 0; solve(board, 0); return 0; } ================================================ FILE: tests/arith.c ================================================ /* Test basic arithmetic */ int expect(int a, int b) { if (!(a == b)) { printf("Failed\n"); printf(" %d expected, but got %d\n", a, b); exit(1); } } int test_basic() { expect(0, 0); expect(3, 1 + 2); expect(3, 1 + 2); expect(10, 1 + 2 + 3 + 4); expect(11, 1 + 2 * 3 + 4); expect(14, 1 * 2 + 3 * 4); expect(4, 4 / 2 + 6 / 3); expect(4, 24 / 2 / 3); expect(98, 'a' + 1); int a = 0 - 1; expect(0 - 1, a); expect(0, a + 1); } int test_inc_dec() { int a = 15; expect(15, a++); expect(16, a); expect(16, a--); expect(15, a); } int test_bool() { expect(0, !1); expect(1, !0); } int test_ternary() { expect(51, (1 + 2) ? 51 : 52); expect(52, (1 - 1) ? 51 : 52); expect(26, (1 - 1) ? 51 : 52 / 2); expect(17, (1 - 0) ? 51 / 3 : 52); } int test_logand() { expect(1, 55 && 2); expect(0, 55 && 0); expect(0, 0 && 55); } int test_bitand() { expect(3, 1 | 2); expect(1, 1 & 3); } int test_shift() { expect(8, 4 << 1); expect(3, 7 >> 1); } int main() { test_basic(); test_inc_dec(); test_bool(); test_ternary(); test_logand(); test_bitand(); test_shift(); return 0; } ================================================ FILE: tests/array.c ================================================ /* Test array */ int expect(int a, int b) { if (!(a == b)) { printf("Failed\n"); printf(" %d expected, but got %d\n", a, b); exit(1); } } int t1() { int a[2][3]; int *p = a; *p = 1; expect(1, *p); } int t2() { int a[2][3]; int *p = a + 1; *p = 1; int *q = a; *p = 32; expect(32, *(q + 3)); } int t3() { int a[4][5]; int *p = a; *(*(a + 1) + 2) = 62; expect(62, *(p + 7)); } int t4() { int a[3] = {1, 2, 3}; expect(1, a[0]); expect(2, a[1]); expect(3, a[2]); } int t5() { int a[2][3]; a[0][1] = 1; a[1][1] = 2; int *p = a; expect(1, p[1]); expect(2, p[4]); } int t6a(int e, int x[][3]) { expect(e, *(*(x + 1) + 1)); } int t6() { int a[2][3]; int *p = a; *(p + 4) = 65; t6a(65, a); } int t7() { int a[3 * 3]; // integer constant expression a[8] = 68; expect(68, a[8]); } int main() { t1(); t2(); t3(); t4(); t5(); t6(); t7(); return 0; } ================================================ FILE: tests/comp.c ================================================ /* Test comparison operators */ int expect(int a, int b) { if (!(a == b)) { printf("Failed\n"); printf(" %d expected, but got %d\n", a, b); exit(1); } } int main() { expect(1, 1 < 2); expect(0, 2 < 1); expect(1, 1 == 1); expect(0, 1 == 2); return 0; } ================================================ FILE: tests/control.c ================================================ /* Test control flow */ int expect(int a, int b) { if (!(a == b)) { printf("Failed\n"); printf(" %d expected, but got %d\n", a, b); exit(1); } } int testif1() { if (1) { return 'a'; } return 0; } int testif2() { if (0) { return 0; } return 'b'; } int testif3() { if (1) { return 'c'; } else { return 0; } return 0; } int testif4() { if (0) { return 0; } else { return 'd'; } return 0; } int testif5() { if (1) return 'e'; return 0; } int testif6() { if (0) return 0; return 'f'; } int testif7() { if (1) return 'g'; else return 0; return 0; } int testif8() { if (0) return 0; else return 'h'; return 0; } int testif9() { if (0 + 1) return 'i'; return 0; } int testif10() { if (1 - 1) return 0; return 'j'; } int testif() { expect('a', testif1()); expect('b', testif2()); expect('c', testif3()); expect('d', testif4()); expect('e', testif5()); expect('f', testif6()); expect('g', testif7()); expect('h', testif8()); expect('i', testif9()); expect('j', testif10()); } int testfor() { int i; int acc = 0; for (i = 0; i < 5; i = i + 1) { acc = acc + i; } expect(10, acc); acc = 0; for (i = 0; i < 5; i = i + 1) acc = acc + i; expect(10, acc); } int main() { testif(); testfor(); return 0; } ================================================ FILE: tests/decl.c ================================================ /* Test declaration */ int expect(int a, int b) { if (!(a == b)) { printf("Failed\n"); printf(" %d expected, but got %d\n", a, b); exit(1); } } int t1() { int a = 1; expect(3, a + 2); } int t2() { int a = 1; int b = 48 + 2; int c = a + b; expect(102, c * 2); } int t3() { int a[] = {55}; int *b = a; expect(55, *b); } int t4() { int a[] = {55, 67}; int *b = a + 1; expect(67, *b); } int t5() { int a[] = {20, 30, 40}; int *b = a + 1; expect(30, *b); } int t6() { int a[] = {20, 30, 40}; expect(20, *a); } int main() { t1(); t2(); t3(); t4(); t5(); t6(); return 0; } ================================================ FILE: tests/driver.sh ================================================ #!/usr/bin/env bash eval `cat .cbuild` function compile { echo "$1" > /dev/stderr echo "$1" | ./mzcc > tmp.s || echo "Failed to compile $1" if [ $? -ne 0 ]; then echo "Failed to compile $1" exit fi $(CBUILD) -o tmp.out tmp.s if [ $? -ne 0 ]; then echo "GCC failed: $1" exit fi } function assert_equal { if [ "$1" != "$2" ]; then echo "Test failed: $2 expected but got $1" exit fi } function test_astf { result="$(echo "$2" | ./mzcc --dump-ast -)" if [ $? -ne 0 ]; then echo "Failed to compile $2" exit fi assert_equal "$result" "$1" } function test_ast { test_astf "$1" "int f(){$2}" } function test_fail { expr="int f(){$1}" echo "$expr" | ./mzcc > /dev/null 2>&1 if [ $? -eq 0 ]; then echo "Should fail to compile, but succeded: $expr" exit fi } # Parser test_ast '(int)f(){1;}' '1;' test_ast '(int)f(){1L;}' '1L;' test_ast '(int)f(){1152921504606846976L;}' '1152921504606846976;' test_ast '(int)f(){(+ (- (+ 1 2) 3) 4);}' '1+2-3+4;' test_ast '(int)f(){(+ (+ 1 (* 2 3)) 4);}' '1+2*3+4;' test_ast '(int)f(){(+ (* 1 2) (* 3 4));}' '1*2+3*4;' test_ast '(int)f(){(+ (/ 4 2) (/ 6 3));}' '4/2+6/3;' test_ast '(int)f(){(/ (/ 24 2) 4);}' '24/2/4;' test_ast '(int)f(){(decl int a 3);}' 'int a=3;' test_ast "(int)f(){(decl char c 'a');}" "char c='a';" test_ast '(int)f(){(decl *char s "abcd");}' 'char *s="abcd";' test_ast '(int)f(){(decl [5]char s "asdf");}' 'char s[5]="asdf";' test_ast '(int)f(){(decl [5]char s "asdf");}' 'char s[]="asdf";' test_ast '(int)f(){(decl [3]int a {1,2,3});}' 'int a[3]={1,2,3};' test_ast '(int)f(){(decl [3]int a {1,2,3});}' 'int a[]={1,2,3};' test_ast '(int)f(){(decl [3][5]int a);}' 'int a[3][5];' test_ast '(int)f(){(decl [5]*int a);}' 'int *a[5];' test_ast '(int)f(){(decl int a 1);(decl int b 2);(= a (= b 3));}' 'int a=1;int b=2;a=b=3;' test_ast '(int)f(){(decl int a 3);(addr a);}' 'int a=3;&a;' test_ast '(int)f(){(decl int a 3);(deref (addr a));}' 'int a=3;*&a;' test_ast '(int)f(){(decl int a 3);(decl *int b (addr a));(deref b);}' 'int a=3;int *b=&a;*b;' test_ast '(int)f(){(if 1 {2;});}' 'if(1){2;}' test_ast '(int)f(){(if 1 {2;} {3;});}' 'if(1){2;}else{3;}' test_ast '(int)f(){(for (decl int a 1) 3 7 {5;});}' 'for(int a=1;3;7){5;}' test_ast '(int)f(){"abcd";}' '"abcd";' test_ast "(int)f(){'c';}" "'c';" test_ast '(int)f(){(int)a();}' 'a();' test_ast '(int)f(){(int)a(1,2,3,4,5,6);}' 'a(1,2,3,4,5,6);' test_ast '(int)f(){(return 1);}' 'return 1;' test_ast '(int)f(){(< 1 2);}' '1<2;' test_ast '(int)f(){(> 1 2);}' '1>2;' test_ast '(int)f(){(== 1 2);}' '1==2;' test_ast '(int)f(){(deref (+ 1 2));}' '1[2];' test_ast '(int)f(){(decl int a 1);(++ a);}' 'int a=1;a++;' test_ast '(int)f(){(decl int a 1);(-- a);}' 'int a=1;a--;' test_ast '(int)f(){(! 1);}' '!1;' test_ast '(int)f(){(? 1 2 3);}' '1?2:3;' test_ast '(int)f(){(and 1 2);}' '1&&2;' test_ast '(int)f(){(or 1 2);}' '1||2;' test_ast '(int)f(){(& 1 2);}' '1&2;' test_ast '(int)f(){(| 1 2);}' '1|2;' test_ast '(int)f(){1.200000;}' '1.2;' test_ast '(int)f(){(+ 1.200000 1);}' '1.2+1;' test_astf '(int)f(int c){c;}' 'int f(int c){c;}' test_astf '(int)f(int c){c;}(int)g(int d){d;}' 'int f(int c){c;} int g(int d){d;}' test_astf '(decl int a 3)' 'int a=3;' test_astf '(decl (struct) a)' 'struct {} a;' test_astf '(decl (struct (int) (char)) a)' 'struct {int x; char y;} a;' test_astf '(decl (struct ([3]int)) a)' 'struct {int x[3];} a;' test_ast '(int)f(){(decl (struct (int)) a);(decl *(struct (int)) p);(deref p).x;}' 'struct tag {int x;} a; struct tag *p; p->x;' test_ast '(int)f(){(decl (struct (int)) a);a.x;}' 'struct {int x;} a; a.x;' test_fail '0abc;' test_fail '1+;' test_fail '1=2;' # & is only applicable to an lvalue test_fail '&"a";' test_fail '&1;' test_fail '&a();' echo "All tests passed" ================================================ FILE: tests/float.c ================================================ /* Test floating point */ int expect(float a, float b) { if (!(a == b)) { printf("Failed\n"); printf(" %f expected, but got %f\n", a, b); exit(1); } } int main() { expect(1.0, 1.0); expect(1.5, 1.0 + 0.5); expect(0.5, 1.0 - 0.5); expect(2.0, 1.0 * 2.0); expect(0.25, 1.0 / 4.0); expect(3.0, 1.0 + 2); expect(2.5, 5 - 2.5); expect(2.0, 1.0 * 2); expect(0.25, 1.0 / 4); return 0; } ================================================ FILE: tests/function.c ================================================ /* Test function */ int expect(int a, int b) { if (!(a == b)) { printf("Failed\n"); printf(" %d expected, but got %d\n", a, b); exit(1); } } int t1() { return 77; } int t2(int a) { expect(79, a); } int t3(int a, int b, int c, int d, int e, int f) { expect(1, a); expect(2, b); expect(3, c); expect(4, d); expect(5, e); expect(6, f); } int t4a(int *p) { return *p; } int t4() { int a[] = {98}; expect(98, t4a(a)); } int t5a(int *p) { expect(99, *p); p = p + 1; expect(98, *p); p = p + 1; expect(97, *p); } int t5b(int p[]) { expect(99, *p); p = p + 1; expect(98, *p); p = p + 1; expect(97, *p); } int t5() { int a[] = {1, 2, 3}; int *p = a; *p = 99; p = p + 1; *p = 98; p = p + 1; *p = 97; t5a(a); t5b(a); } int main() { expect(77, t1()); t2(79); t3(1, 2, 3, 4, 5, 6); t4(); t5(); return 0; } ================================================ FILE: tests/global.c ================================================ /* Test global variable */ int val = 21; int a1[3]; int a2[3] = {24, 25, 26}; int expect(int a, int b) { if (!(a == b)) { printf("Failed\n"); printf(" %d expected, but got %d\n", a, b); exit(1); } } int main() { expect(21, val); val = 22; expect(22, val); a1[1] = 23; expect(23, a1[1]); expect(25, a2[1]); return 0; } ================================================ FILE: tests/long.c ================================================ /* Test long integer */ int expect(long a, long b) { if (!(a == b)) { printf("Failed\n"); printf(" %ld expected, but got %ld\n", a, b); exit(1); } } int main() { expect(10L, 10L); expect(1152921504606846976, 1152921504606846976); expect(1152921504606846977, 1152921504606846976 + 1); return 0; } ================================================ FILE: tests/pointer.c ================================================ /* Test pointer */ int expect(int a, int b) { if (!(a == b)) { printf("Failed\n"); printf(" %d expected, but got %d\n", a, b); exit(1); } } int t1() { int a = 61; int *b = &a; expect(61, *b); } int t2() { char *c = "ab"; expect(97, *c); } int t3() { char *c = "ab" + 1; expect(98, *c); } int t4() { char s[] = "xyz"; char *c = s + 2; expect(122, *c); } int t5() { char s[] = "xyz"; *s = 65; expect(65, *s); } int main() { t1(); t2(); t3(); t4(); t5(); return 0; } ================================================ FILE: tests/pointer_arith.c ================================================ /* Test pointer arithmetic*/ int expect(int a, int b) { if (!(a == b)) { printf("Failed\n"); printf(" %d expected, but got %d\n", a, b); exit(1); } } int t1() { char *s = "abcdefghi"; char *x = s; char *t = x + 1; expect(98, *t); } int t2() { char *s = "abcdefghi"; int *x = s; char *t = x + 1; expect(101, *t); } int t3() { char *s = "abcdefghi"; long *x = s; char *t = x + 1; expect(105, *t); } int t4() { char *s = "abcdefghi"; void *x = s; char *t = x + 1; expect(98, *t); } int main() { t1(); t2(); t3(); t4(); } ================================================ FILE: tests/scope.c ================================================ /* Test scope */ int expect(int a, int b) { if (!(a == b)) { printf("Failed\n"); printf(" %d expected, but got %d\n", a, b); exit(1); } } int main() { int a = 31; { int a = 64; } expect(31, a); { int a = 64; expect(64, a); } return 0; } ================================================ FILE: tests/struct.c ================================================ /* Test struct */ int expect(int a, int b) { if (!(a == b)) { printf("Failed\n"); printf(" %d expected, but got %d\n", a, b); exit(1); } } int t1() { struct { int a; } x; x.a = 61; expect(61, x.a); } int t2() { struct { int a; int b; } x; x.a = 61; x.b = 2; expect(63, x.a + x.b); } int t3() { struct { int a; struct { char b; int c; } y; } x; x.a = 61; x.y.b = 3; x.y.c = 3; expect(67, x.a + x.y.b + x.y.c); } int t4() { struct tag4 { int a; struct { char b; int c; } y; } x; struct tag4 s; s.a = 61; s.y.b = 3; s.y.c = 3; expect(67, s.a + s.y.b + s.y.c); } int t5() { struct tag5 { int a; } x; struct tag5 *p = &x; x.a = 68; expect(68, (*p).a); } int t6() { struct tag6 { int a; } x; struct tag6 *p = &x; (*p).a = 69; expect(69, x.a); } int t7() { struct tag7 { int a; int b; } x; struct tag7 *p = &x; x.b = 71; expect(71, (*p).b); } int t8() { struct tag8 { int a; int b; } x; struct tag8 *p = &x; (*p).b = 72; expect(72, x.b); } int t9() { struct tag9 { int a[3]; int b[3]; } x; x.a[0] = 73; expect(73, x.a[0]); x.b[1] = 74; expect(74, x.b[1]); expect(74, x.a[4]); } struct tag10 { int a; struct tag10a { char b; int c; } y; } v10; int t10() { v10.a = 71; v10.y.b = 3; v10.y.c = 3; expect(77, v10.a + v10.y.b + v10.y.c); } struct tag11 { int a; } v11; int t11() { struct tag11 *p = &v11; v11.a = 78; expect(78, (*p).a); expect(78, v11.a); expect(78, p->a); p->a = 79; expect(79, (*p).a); expect(79, v11.a); expect(79, p->a); } struct tag12 { int a; int b; } x; int t12() { struct tag12 a[3]; a[0].a = 83; expect(83, a[0].a); a[0].b = 84; expect(84, a[0].b); a[1].b = 85; expect(85, a[1].b); int *p = a; expect(85, p[3]); } int main() { t1(); t2(); t3(); t4(); t5(); t6(); t7(); t8(); t9(); t10(); t11(); t12(); return 0; } ================================================ FILE: tests/union.c ================================================ /* Test union */ int expect(int a, int b) { if (!(a == b)) { printf("Failed\n"); printf(" %d expected, but got %d\n", a, b); exit(1); } } int t1() { union { int a; int b; } x; x.a = 90; expect(90, x.b); } int t2() { union { char a[4]; int b; } x; x.b = 0; x.a[1] = 1; expect(256, x.b); } int t3() { union { char a[4]; int b; } x; x.a[0] = x.a[1] = x.a[2] = x.a[3] = 0; x.a[1] = 1; expect(256, x.b); } int main() { t1(); t2(); t3(); return 0; } ================================================ FILE: util.h ================================================ #ifndef MAZUCC_UTIL_H #define MAZUCC_UTIL_H #include #include #include #include #include "list.h" typedef struct { char *body; int nalloc, len; } String; static List *cstrings = &EMPTY_LIST; #define INIT_SIZE 8 static inline String make_string(void) { return (String){ .body = calloc(1, INIT_SIZE), .nalloc = INIT_SIZE, .len = 0, }; } static inline void realloc_body(String *s) { int newsize = s->nalloc * 2; char *body = realloc(s->body, newsize); s->body = body; s->nalloc = newsize; } static inline char *get_cstring(const String s) { char *r = s.body; list_push(cstrings, r); return r; } static inline void string_append(String *s, char c) { if (s->nalloc == (s->len + 1)) realloc_body(s); s->body[s->len++] = c; s->body[s->len] = '\0'; } static inline void string_appendf(String *s, char *fmt, ...) { va_list args; while (1) { int avail = s->nalloc - s->len; va_start(args, fmt); int written = vsnprintf(s->body + s->len, avail, fmt, args); va_end(args); if (avail <= written) { realloc_body(s); continue; } s->len += written; return; } } #define error(...) errorf(__FILE__, __LINE__, __VA_ARGS__) #define assert(expr) \ do { \ if (!(expr)) \ error("Assertion failed: " #expr); \ } while (0) static inline void errorf(char *file, int line, char *fmt, ...) { fprintf(stderr, "%s:%d: ", file, line); va_list args; va_start(args, fmt); vfprintf(stderr, fmt, args); fprintf(stderr, "\n"); va_end(args); exit(1); } static inline char *quote_cstring(char *p) { String s = make_string(); for (; *p; p++) { if (*p == '\"' || *p == '\\') string_appendf(&s, "\\%c", *p); else if (*p == '\n') string_appendf(&s, "\\n"); else string_append(&s, *p); } return get_cstring(s); } #endif /* MAZUCC_UTIL_H */ ================================================ FILE: verbose.c ================================================ #include "mzcc.h" char *ctype_to_string(Ctype *ctype) { if (!ctype) return "(nil)"; switch (ctype->type) { case CTYPE_VOID: return "void"; case CTYPE_INT: return "int"; case CTYPE_LONG: return "long"; case CTYPE_CHAR: return "char"; case CTYPE_FLOAT: return "float"; case CTYPE_DOUBLE: return "double"; case CTYPE_PTR: { String s = make_string(); string_appendf(&s, "*%s", ctype_to_string(ctype->ptr)); return get_cstring(s); } case CTYPE_ARRAY: { String s = make_string(); string_appendf(&s, "[%d]%s", ctype->len, ctype_to_string(ctype->ptr)); return get_cstring(s); } case CTYPE_STRUCT: { String s = make_string(); string_appendf(&s, "(struct"); for (Iter i = list_iter(dict_values(ctype->fields)); !iter_end(i);) string_appendf(&s, " (%s)", ctype_to_string(iter_next(&i))); string_appendf(&s, ")"); return get_cstring(s); } default: error("Unknown ctype: %d", ctype); return NULL; /* non-reachable */ } } static void uop_to_string(String *buf, char *op, Ast *ast) { string_appendf(buf, "(%s %s)", op, ast_to_string(ast->operand)); } static void binop_to_string(String *buf, char *op, Ast *ast) { string_appendf(buf, "(%s %s %s)", op, ast_to_string(ast->left), ast_to_string(ast->right)); } static void ast_to_string_int(String *buf, Ast *ast) { if (!ast) { string_appendf(buf, "(nil)"); return; } switch (ast->type) { case AST_LITERAL: switch (ast->ctype->type) { case CTYPE_CHAR: if (ast->ival == '\n') string_appendf(buf, "'\n'"); else if (ast->ival == '\\') string_appendf(buf, "'\\\\'"); else string_appendf(buf, "'%c'", ast->ival); break; case CTYPE_INT: string_appendf(buf, "%d", ast->ival); break; case CTYPE_LONG: string_appendf(buf, "%ldL", ast->ival); break; case CTYPE_FLOAT: case CTYPE_DOUBLE: string_appendf(buf, "%f", ast->fval); break; default: error("internal error"); } break; case AST_STRING: string_appendf(buf, "\"%s\"", quote_cstring(ast->sval)); break; case AST_LVAR: case AST_GVAR: string_appendf(buf, "%s", ast->varname); break; case AST_FUNCALL: { string_appendf(buf, "(%s)%s(", ctype_to_string(ast->ctype), ast->fname); for (Iter i = list_iter(ast->args); !iter_end(i);) { string_appendf(buf, "%s", ast_to_string(iter_next(&i))); if (!iter_end(i)) string_appendf(buf, ","); } string_appendf(buf, ")"); break; } case AST_FUNC: { string_appendf(buf, "(%s)%s(", ctype_to_string(ast->ctype), ast->fname); for (Iter i = list_iter(ast->params); !iter_end(i);) { Ast *param = iter_next(&i); string_appendf(buf, "%s %s", ctype_to_string(param->ctype), ast_to_string(param)); if (!iter_end(i)) string_appendf(buf, ","); } string_appendf(buf, ")"); ast_to_string_int(buf, ast->body); break; } case AST_DECL: string_appendf(buf, "(decl %s %s", ctype_to_string(ast->declvar->ctype), ast->declvar->varname); if (ast->declinit) string_appendf(buf, " %s)", ast_to_string(ast->declinit)); else string_appendf(buf, ")"); break; case AST_ARRAY_INIT: string_appendf(buf, "{"); for (Iter i = list_iter(ast->arrayinit); !iter_end(i);) { ast_to_string_int(buf, iter_next(&i)); if (!iter_end(i)) string_appendf(buf, ","); } string_appendf(buf, "}"); break; case AST_IF: string_appendf(buf, "(if %s %s", ast_to_string(ast->cond), ast_to_string(ast->then)); if (ast->els) string_appendf(buf, " %s", ast_to_string(ast->els)); string_appendf(buf, ")"); break; case AST_TERNARY: string_appendf(buf, "(? %s %s %s)", ast_to_string(ast->cond), ast_to_string(ast->then), ast_to_string(ast->els)); break; case AST_FOR: string_appendf(buf, "(for %s %s %s ", ast_to_string(ast->forinit), ast_to_string(ast->forcond), ast_to_string(ast->forstep)); string_appendf(buf, "%s)", ast_to_string(ast->forbody)); break; case AST_RETURN: string_appendf(buf, "(return %s)", ast_to_string(ast->retval)); break; case AST_COMPOUND_STMT: { string_appendf(buf, "{"); for (Iter i = list_iter(ast->stmts); !iter_end(i);) { ast_to_string_int(buf, iter_next(&i)); string_appendf(buf, ";"); } string_appendf(buf, "}"); break; } case AST_STRUCT_REF: ast_to_string_int(buf, ast->struc); string_appendf(buf, "."); string_appendf(buf, ast->field); break; case AST_ADDR: uop_to_string(buf, "addr", ast); break; case AST_DEREF: uop_to_string(buf, "deref", ast); break; case PUNCT_INC: uop_to_string(buf, "++", ast); break; case PUNCT_DEC: uop_to_string(buf, "--", ast); break; case PUNCT_LOGAND: binop_to_string(buf, "and", ast); break; case PUNCT_LOGOR: binop_to_string(buf, "or", ast); break; case '!': uop_to_string(buf, "!", ast); break; case '&': binop_to_string(buf, "&", ast); break; case '|': binop_to_string(buf, "|", ast); break; default: { char *left = ast_to_string(ast->left); char *right = ast_to_string(ast->right); if (ast->type == PUNCT_EQ) string_appendf(buf, "(== "); else string_appendf(buf, "(%c ", ast->type); string_appendf(buf, "%s %s)", left, right); } } } char *ast_to_string(Ast *ast) { String s = make_string(); ast_to_string_int(&s, ast); return get_cstring(s); } char *token_to_string(const Token tok) { enum TokenType ttype = get_ttype(tok); if (ttype == TTYPE_NULL) return "(null)"; String s = make_string(); switch (ttype) { case TTYPE_NULL: error("internal error: unknown token type: %d", get_ttype(tok)); case TTYPE_IDENT: return get_ident(tok); case TTYPE_PUNCT: if (is_punct(tok, PUNCT_EQ)) string_appendf(&s, "=="); else string_appendf(&s, "%c", get_punct(tok)); return get_cstring(s); case TTYPE_CHAR: string_append(&s, get_char(tok)); return get_cstring(s); case TTYPE_NUMBER: return get_number(tok); case TTYPE_STRING: string_appendf(&s, "\"%s\"", get_strtok(tok)); return get_cstring(s); } error("internal error: unknown token type: %d", get_ttype(tok)); return NULL; /* non-reachable */ }